Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IKKBZ Join ordering #1330

Open
wants to merge 51 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
a7e0bcc
JoinTree Imp.
Goblin80 Apr 21, 2024
a0784c3
IKKBZ Imp.
Goblin80 Apr 21, 2024
c07eb3f
IKKBZ sanity tests
Goblin80 Apr 21, 2024
a598baa
.clang-format
Goblin80 Apr 21, 2024
75d2194
split this up to a .h and .cpp
Goblin80 Apr 27, 2024
88eed17
std::span
Goblin80 Apr 27, 2024
407ee68
allow discard add_relation
Goblin80 Apr 27, 2024
9cc7139
.clang-format
Goblin80 Apr 27, 2024
963ed97
move out rank and test it separately
Goblin80 Apr 30, 2024
fd8fecd
make linker happy
Goblin80 Apr 30, 2024
29ae853
[skip ci] rank table with CostASI
Goblin80 Apr 30, 2024
653cc84
cleanup costfn
Goblin80 Apr 30, 2024
06e873e
link util
Goblin80 May 1, 2024
785361a
link engine
Goblin80 May 1, 2024
4319d58
[skip ci] argmin of all relations
Goblin80 May 1, 2024
2c34528
[skip ci] ASI comments
Goblin80 May 1, 2024
c17a3d5
[skip ci] IKKBZ par_unseq
Goblin80 May 1, 2024
a2ffca2
[skip ci] sprinkle const
Goblin80 May 1, 2024
788b7cf
[skip ci] iter relay
Goblin80 May 2, 2024
03055de
[skip ci] partial_sort
Goblin80 May 2, 2024
2d5354b
C span seq
Goblin80 May 3, 2024
0c353dd
.clang-format
Goblin80 May 3, 2024
94b8fa7
gcc11 compatible
Goblin80 May 3, 2024
c1861e1
linker stuff
Goblin80 May 4, 2024
a49414b
rm CostASITest.cpp
Goblin80 May 4, 2024
c837874
include .cpp
Goblin80 May 4, 2024
ba9498e
mac being annoying per uge
Goblin80 May 4, 2024
41b07c0
Merge branch 'ad-freiburg:master' into joinorder-ikkbz
Goblin80 May 5, 2024
3158e74
decouple cost fn and memorize rank
Goblin80 May 5, 2024
6f4ff38
ty abseil
Goblin80 May 6, 2024
1f5eabf
unpack paired rxs
Goblin80 May 7, 2024
b467956
inv default undirected
Goblin80 May 9, 2024
91ffc4a
init JoinTree
Goblin80 Oct 27, 2024
e9b0881
auto?
Goblin80 Oct 27, 2024
803fda7
JoinNode, JoinTree
Goblin80 Oct 27, 2024
ebc77b4
Cout
Goblin80 Oct 27, 2024
1e512e5
fixup! auto?
Goblin80 Oct 27, 2024
3316dca
init LinearizedDP
Goblin80 Oct 27, 2024
4eeb722
headers
Goblin80 Oct 27, 2024
9917300
TreeCostTest
Goblin80 Oct 27, 2024
40af54d
Merge branch 'ad-freiburg:master' into joinorder-ikkbz
Goblin80 Oct 27, 2024
9117b88
fmt
Goblin80 Oct 27, 2024
5f1492f
fixup! fmt
Goblin80 Oct 27, 2024
c89a33a
fix expanded macro
Goblin80 Oct 27, 2024
4c8ec01
codecov
Goblin80 Oct 27, 2024
25145da
unused
Goblin80 Oct 27, 2024
ff97031
empty JoinNode
Goblin80 Oct 27, 2024
1b69d07
fixup! empty JoinNode
Goblin80 Oct 27, 2024
c3040d5
fixup! unused
Goblin80 Oct 27, 2024
1f0f366
fixup! codecov
Goblin80 Oct 28, 2024
27f3269
codeconv stuck
Goblin80 Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
add_subdirectory(sparqlExpressions)
add_subdirectory(joinOrdering)
add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp)
qlever_target_link_libraries(SortPerformanceEstimator)

add_library(engine
Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp
IndexScan.cpp Join.cpp Sort.cpp
Expand Down
10 changes: 10 additions & 0 deletions src/engine/joinOrdering/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
add_library(joinOrdering
QueryGraph.cpp
JoinTree.cpp JoinNode.cpp
IKKBZ.cpp
LinearizedDP.cpp
RelationBasic.cpp
EdgeInfo.cpp
CostIKKBZ.cpp
CostCout.cpp)
qlever_target_link_libraries(joinOrdering)
140 changes: 140 additions & 0 deletions src/engine/joinOrdering/CostCout.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author:
// Mahmoud Khalaf (2024-, [email protected])

#include "CostCout.h"

namespace JoinOrdering::Cost {

template <typename N>
requires RelationAble<N> float selectivity(
std::shared_ptr<JoinNode<N>> x, std::shared_ptr<JoinNode<N>> y,
const std::map<std::string, std::map<std::string, float>>& selectivities) {
if (!x) return 1.0;
if (!y) return 1.0;

if (x->isLeaf() && y->isLeaf())
// return selectivities.at(x.get()->relation.getLabel())
// .at(y.get()->relation.getLabel());
{
auto z = selectivities.at(x.get()->relation.getLabel());
auto kk = y.get()->relation.getLabel();
if (!z.contains(kk)) return 1.0;
auto zz = z.at(kk); // TODO: get or default
return zz;
}

if (x->isLeaf() && !y->isLeaf())
return selectivity(x, y->left, selectivities) *
selectivity(x, y->right, selectivities);

if (!x->isLeaf() && y->isLeaf())
return selectivity(y, x->left, selectivities) *
selectivity(y, x->right, selectivities);

return selectivity(x->left, y->left, selectivities) *
selectivity(x->left, y->right, selectivities) *
selectivity(x->right, y->left, selectivities) *
selectivity(x->right, y->right, selectivities);
}

// assumes independence of the predicates
// ref: 77/637
template <typename N>
requires RelationAble<N> unsigned long long cardinality(
std::shared_ptr<JoinNode<N>> n,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities) {
if (n == nullptr) return 1;

// TODO: log missing relation cardinality
if (n->isLeaf()) return cardinalities.at(n->relation.getLabel());

auto l = n->left;
auto r = n->right;

if (l && r)
return cardinality(l, cardinalities, selectivities) *
cardinality(r, cardinalities, selectivities) *
selectivity(l, r, selectivities);

if (l) return cardinality(n->left, cardinalities, selectivities);
// if (r) return cardinality(n->right, cardinalities, selectivities);
return cardinality(n->right, cardinalities, selectivities);

Check warning on line 64 in src/engine/joinOrdering/CostCout.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/joinOrdering/CostCout.cpp#L64

Added line #L64 was not covered by tests

// AD_CONTRACT_CHECK("How Did We Get Here?");
// return 0;
}

template <typename N>
requires RelationAble<N>
double Cout(const JoinTree<N>& t, const QueryGraph<N>& q) {
// q.selectivity
std::map<std::string, std::map<std::string, float>> qselecm;
std::map<std::string, unsigned long long> qcards;

// FIXME: garbage!
// std::map<N, std::map<N, EdgeInfo>>
for (auto const& [k, xm] : q.edges_) {
auto l = k;
qcards[l.getLabel()] = l.getCardinality();
for (auto const& [x, xe] : xm) {
auto r = x;
if (!xe.hidden) {
auto s = xe.weight;
qselecm[l.getLabel()][r.getLabel()] = s;
qselecm[r.getLabel()][l.getLabel()] = s;
}
}
}

return Cout(t.root, qcards, qselecm);
}

template <typename N>
requires RelationAble<N> double Cout(
const JoinTree<N>& t,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities) {
return Cout(t.root, cardinalities, selectivities);
}

// ref: 79/637
template <typename N>
requires RelationAble<N> double Cout(
std::shared_ptr<JoinNode<N>> n,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities) {
if (n == nullptr) return 0; // empty join tree, DP table
if (n->isLeaf()) return 0;

auto l = n->left;
auto r = n->right;

if (l && r)
return cardinality(n, cardinalities, selectivities) +
Cout(l, cardinalities, selectivities) +
Cout(r, cardinalities, selectivities);

if (l) return Cout(l, cardinalities, selectivities);
// if (r) return Cout(r, cardinalities, selectivities);
return Cout(r, cardinalities, selectivities);

Check warning on line 122 in src/engine/joinOrdering/CostCout.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/joinOrdering/CostCout.cpp#L122

Added line #L122 was not covered by tests
// AD_CONTRACT_CHECK("How Did We Get Here?");
// return 0;
}

template double Cout(const JoinTree<RelationBasic>& t,
const QueryGraph<RelationBasic>& q);

template double Cout(
const JoinTree<RelationBasic>& t,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities);

template double Cout(
std::shared_ptr<JoinNode<RelationBasic>> n,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities);

} // namespace JoinOrdering::Cost
74 changes: 74 additions & 0 deletions src/engine/joinOrdering/CostCout.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author:
// Mahmoud Khalaf (2024-, [email protected])

#pragma once

#include <map>
#include <memory>

#include "JoinNode.h"
#include "JoinTree.h"
#include "QueryGraph.h"

namespace JoinOrdering::Cost {

template <typename N>
requires RelationAble<N>
double Cout(const JoinTree<N>& t, const QueryGraph<N>& q);

/**
*
* Basic Cost Function that returns an estimate on how expensive to
* evaluate a given JoinTree. low cost implies cheap execution plan.
*
* ref: 79/637
*
* // TODO: can be inferred by RelationAble::getCardinality
* // TODO: better use some sort of map of unordered pairs since selectivity is
* direction-less
* // TODO: default to 1.0 when the selectivity between 2 relations is not
* defined.
*
* @tparam N type that satisfies RelationAble concept
* @param t Linear JoinTree (left-deep, right-deep, zigzag, ...)
* @param cardinalities map of cardinality of each relation in the tree
* @param selectivities map of selectivity for each pair of relation in the tree
* @return Cost Evaluation for given JoinTree
*/
template <typename N>
requires RelationAble<N> double Cout(
const JoinTree<N>& t,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities);

/**
*
*
* @tparam N type that satisfies RelationAble concept
* @param r JoinNode that can be inner (join operators) or leaf node (relations)
* @param cardinalities map of cardinality of each relation in the tree
* @param selectivities map of selectivity for each pair of relation in the tree
* @return Cost Evaluation for given JoinNode
*/
template <typename N>
requires RelationAble<N> double Cout(
std::shared_ptr<JoinNode<N>> r,
const std::map<std::string, unsigned long long>& cardinalities,
const std::map<std::string, std::map<std::string, float>>& selectivities);

// template <typename N>
// requires RelationAble<N> unsigned long long cardinality(
// std::shared_ptr<JoinNode<N>> r,
// const std::map<std::string, int>& cardinalities,
// const std::map<std::string, std::map<std::string, float>>&
// selectivities);
//
// template <typename N>
// requires RelationAble<N> float selectivity(
// std::shared_ptr<JoinNode<N>> x, std::shared_ptr<JoinNode<N>> y,
// const std::map<std::string, std::map<std::string, float>>&
// selectivities);

} // namespace JoinOrdering::Cost
67 changes: 67 additions & 0 deletions src/engine/joinOrdering/CostIKKBZ.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author:
// Mahmoud Khalaf (2024-, [email protected])

#include "CostIKKBZ.h"

#include "RelationBasic.h"

namespace JoinOrdering {

template <typename N>
requires RelationAble<N>
float CostIKKBZ<N>::C(const QueryGraph<N>& g, std::span<N> seq) {
if (seq.empty()) return 0.0f;
auto s1 = seq.front();
// auto s2 = seq | std::views::drop(1);
auto s2 = seq.subspan(1);
return C(g, s1) + T(g, s1) * C(g, s2); // TODO: might overflow
}

template <typename N>
requires RelationAble<N>
float CostIKKBZ<N>::C(const QueryGraph<N>& g, const N& n) {
// return 0 if Ri is root 113/637
if (g.root == n) return 0;

// i.e: regular relation
if (!g.is_compound_relation(n)) return T(g, n);

auto const& [s1, s2] = g.hist.at(n).value();
return C(g, s1) + T(g, s1) * C(g, s2); // TODO: might overflow
}

template <typename N>
requires RelationAble<N>
float CostIKKBZ<N>::T(const QueryGraph<N>& g, const N& n) {
// return 0 if Ri is root 113/637
if (g.root == n) return 1;
return g.selectivity.at(n) * static_cast<float>(n.getCardinality());
}

template <typename N>
requires RelationAble<N>
float CostIKKBZ<N>::rank(const QueryGraph<N>& g, const N& n) {
// memorize cost and rank
// avoid recomputing for long sequences
if (rank_m.contains(n)) return rank_m[n]; // important
auto c = C_m.contains(n) ? C_m[n] : C(g, n); // important
auto t = T_m.contains(n) ? T_m[n] : T(g, n); // maybe not important

if (c == 0) return 0;
auto r = (t - 1) / c;
AD_CONTRACT_CHECK(r >= 0 && r <= 1);

rank_m[n] = r;
C_m[n] = c;
T_m[n] = t;
return r;
}

template float CostIKKBZ<RelationBasic>::C(const QueryGraph<RelationBasic>& g,
std::span<RelationBasic> seq);

template float CostIKKBZ<RelationBasic>::rank(
const QueryGraph<RelationBasic>& g, const RelationBasic& n);
} // namespace JoinOrdering
72 changes: 72 additions & 0 deletions src/engine/joinOrdering/CostIKKBZ.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author:
// Mahmoud Khalaf (2024-, [email protected])

#pragma once

#include <span>

#include "ICostASI.h"
// #include "util/HashMap.h"
#include <map>

namespace JoinOrdering {

template <typename N>
requires RelationAble<N> class CostIKKBZ : public ICostASI<N> {
public:
// ad_utility::HashMap<N, float> rank_m;
// ad_utility::HashMap<N, float> C_m;
// ad_utility::HashMap<N, float> T_m;

std::map<N, float> rank_m;
std::map<N, float> C_m;
std::map<N, float> T_m;

float rank(const QueryGraph<N>& g, const N& n);

/**
*
* calculate T for an uncompound relation s_i * n_i
* (cardinality * selectivity)
*
*
* @param g precedence tree
* @param n Relation
* @return T(n)
*/
float T(const QueryGraph<N>& g, const N& n);
/**
*
* a join is called increasing if cost > 1
* a join is called decreasing if cost < 1
*
* ref: 113/637
*
* @param g precedence tree
* @param n Relation
* @return C(n)
*/
float C(const QueryGraph<N>& g, const N& n);

/**
*
* calculate cost for a sequence of relations
*
*
* C(eps) = 0
* C(R) = 0 (if R is root)
* C(R) = h_i * (n_i)
* C(S_1 S_2) = C(S1) + T(S1) * C(S2)
*
* ref: 113/637
*
* @param g precedence tree
* @param seq sequence of relations (may include compound relations)
* @return C(S_1 S_2)
*/
float C(const QueryGraph<N>& g, std::span<N> seq);
};

} // namespace JoinOrdering
14 changes: 14 additions & 0 deletions src/engine/joinOrdering/EdgeInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author:
// Mahmoud Khalaf (2024-, [email protected])

#include "EdgeInfo.h"

namespace JoinOrdering {

EdgeInfo::EdgeInfo() = default;
// EdgeInfo::EdgeInfo(Direction dir) : direction(dir) {}
EdgeInfo::EdgeInfo(Direction dir, float weight)
: direction(dir), weight(weight) {}
} // namespace JoinOrdering
Loading
Loading