Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An optimization for LANGMATCHES #1623

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/engine/sparqlExpressions/SparqlExpressionPimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class SparqlExpressionPimpl {
struct LangFilterData {
Variable variable_;
std::string language_;
bool isLangmatches_ = false;
};
std::optional<LangFilterData> getLanguageFilterExpression() const;

Expand Down
21 changes: 20 additions & 1 deletion src/engine/sparqlExpressions/StringExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,9 +479,28 @@ using EncodeForUriExpression =
}
};

using LangMatches =
using LangMatchesImpl =
StringExpressionImpl<2, decltype(langMatching), StringValueGetter>;

class LangMatches : public LangMatchesImpl {
public:
using LangMatchesImpl::LangMatchesImpl;
std::optional<LangFilterData> getLanguageFilterExpression() const override {
AD_CORRECTNESS_CHECK(children().size() == 2);
auto* var = dynamic_cast<const VariableExpression*>(children()[0].get());
auto* str =
dynamic_cast<const StringLiteralExpression*>(children()[1].get());
if (!(var && str)) {
return std::nullopt;
}
// TODO<joka921> We need to check whether the literal is plain. (no language
// tag or something else).
return LangFilterData{
var->value(),
std::string(asStringViewUnsafe(str->value().getContent())), true};
}
};

// STRING WITH LANGUAGE TAG
[[maybe_unused]] inline auto strLangTag =
[](std::optional<std::string> input,
Expand Down
8 changes: 6 additions & 2 deletions src/index/IndexBuilderTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,15 @@ auto getIdMapLambdas(
// the allocation and deallocation of these hash maps (that are newly
// created for each batch) much cheaper (see `CachingMemoryResource.h` and
// `IndexImpl.cpp`).
itemArray[j]->map_.map_.reserve(5 * maxNumberOfTriples / NumThreads);
itemArray[j]->map_.map_.reserve(6 * maxNumberOfTriples / NumThreads);
// The LANGUAGE_PREDICATE gets the first ID in each map. TODO<joka921>
// This is not necessary for the actual QLever code, but certain unit tests
// currently fail without it.
itemArray[j]->getId(TripleComponent{
ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)});
}
using OptionalIds =
std::array<std::optional<std::array<Id, NumColumnsIndexBuilding>>, 3>;
std::array<std::optional<std::array<Id, NumColumnsIndexBuilding>>, 4>;

/* given an index idx, returns a lambda that
* - Takes a triple and a language tag
Expand All @@ -278,6 +278,8 @@ auto getIdMapLambdas(
.iriOrLiteral_.getIri();
auto langTaggedPredId = map.getId(TripleComponent{
ad_utility::convertToLanguageTaggedPredicate(iri, lt.langtag_)});
auto langMatchesTaggedPredId = map.getId(TripleComponent{
ad_utility::convertToLangmatchesTaggedPredicate(iri, lt.langtag_)});
auto& spoIds = *res[0]; // ids of original triple
// TODO replace the std::array by an explicit IdTriple class,
// then the emplace calls don't need the explicit type.
Expand All @@ -299,6 +301,8 @@ auto getIdMapLambdas(
ad_utility::triple_component::Iri::fromIriref(
LANGUAGE_PREDICATE)}),
langTagId, tripleGraphId});
res[3].emplace(
Arr{spoIds[0], langMatchesTaggedPredId, spoIds[2], tripleGraphId});
}
return res;
};
Expand Down
2 changes: 1 addition & 1 deletion src/index/IndexFormatVersion.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ struct IndexFormatVersion {
// The actual index version. Change it once the binary format of the index
// changes.
inline const IndexFormatVersion& indexFormatVersion{
1572, DateYearOrDuration{Date{2024, 10, 22}}};
1623, DateYearOrDuration{Date{2024, 11, 20}}};
} // namespace qlever
5 changes: 3 additions & 2 deletions src/parser/GraphPattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ class GraphPattern {

// Modify query to take care of language filter. `variable` is the variable,
// `languageInQuotes` is the language.
void addLanguageFilter(const Variable& variable,
const std::string& languageInQuotes);
[[nodiscard]] bool addLanguageFilter(const Variable& variable,
const std::string& languageInQuotes,
bool isLangmatches = false);

bool _optional;

Expand Down
20 changes: 16 additions & 4 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,9 @@ void ParsedQuery::registerVariableVisibleInQueryBody(const Variable& variable) {
ParsedQuery::GraphPattern::GraphPattern() : _optional(false) {}

// __________________________________________________________________________
void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
const std::string& langTag) {
bool ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
const std::string& langTag,
bool isLangmatches) {
// Find all triples where the object is the `variable` and the predicate is
// a simple `IRIREF` (neither a variable nor a complex property path).
// Search in all the basic graph patterns, as filters have the complete
Expand All @@ -275,6 +276,10 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
// Subqueries etc.
// TODO<joka921> Also support property paths (^rdfs:label,
// skos:altLabel|rdfs:label, ...)

if (isLangmatches && langTag.find('-') != std::string::npos) {
return false;
}
std::vector<SparqlTriple*> matchingTriples;
using BasicPattern = parsedQuery::BasicGraphPattern;
namespace ad = ad_utility;
Expand All @@ -295,14 +300,20 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,

// Replace all the matching triples.
for (auto* triplePtr : matchingTriples) {
triplePtr->p_._iri = ad_utility::convertToLanguageTaggedPredicate(
triplePtr->p_._iri, langTag);
triplePtr->p_._iri = isLangmatches
? ad_utility::convertToLangmatchesTaggedPredicate(
triplePtr->p_._iri, langTag)
: ad_utility::convertToLanguageTaggedPredicate(
triplePtr->p_._iri, langTag);
}

// Handle the case, that no suitable triple (see above) was found. In this
// case a triple `?variable ql:langtag "language"` is added at the end of
// the graph pattern.
if (matchingTriples.empty()) {
if (isLangmatches) {
return false;
}
LOG(DEBUG) << "language filter variable " + variable.name() +
" did not appear as object in any suitable "
"triple. "
Expand All @@ -326,6 +337,7 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable,
langEntity);
t.push_back(std::move(triple));
}
return true;
}

// ____________________________________________________________________________
Expand Down
8 changes: 6 additions & 2 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,12 @@ GraphPattern Visitor::visit(Parser::GroupGraphPatternContext* ctx) {
if (auto langFilterData =
filter.expression_.getLanguageFilterExpression();
langFilterData.has_value()) {
const auto& [variable, language] = langFilterData.value();
pattern.addLanguageFilter(variable, language);
const auto& [variable, language, isLangmatches] =
langFilterData.value();
if (!pattern.addLanguageFilter(variable, language, isLangmatches)) {
// TODO<joka921> Code duplication.
pattern._filters.push_back(std::move(filter));
}
} else {
pattern._filters.push_back(std::move(filter));
}
Expand Down
26 changes: 22 additions & 4 deletions src/util/Conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,39 @@
namespace ad_utility {

// _________________________________________________________
triple_component::Iri convertLangtagToEntityUri(const string& tag) {
triple_component::Iri convertLangtagToEntityUri(std::string_view tag) {
return triple_component::Iri::fromIriref(makeQleverInternalIri("@", tag));
}

// _________________________________________________________
std::string convertToLanguageTaggedPredicate(const string& pred,
const string& langtag) {
std::string convertToLanguageTaggedPredicate(std::string_view pred,
std::string_view langtag) {
return absl::StrCat("@", langtag, "@", pred);
}

static std::string_view getPrimaryLanguage(std::string_view language) {
return language.substr(0, language.find('-'));
}

// _________________________________________________________
triple_component::Iri convertToLanguageTaggedPredicate(
const triple_component::Iri& pred, const std::string& langtag) {
const triple_component::Iri& pred, std::string_view langtag) {
return triple_component::Iri::fromIriref(absl::StrCat(
"@", langtag, "@<", asStringViewUnsafe(pred.getContent()), ">"));
}

// _________________________________________________________
std::string convertToLangmatchesTaggedPredicate(std::string_view pred,
std::string_view langtag) {
return absl::StrCat("@@", getPrimaryLanguage(langtag), "@@", pred);
}

// _________________________________________________________
triple_component::Iri convertToLangmatchesTaggedPredicate(
const triple_component::Iri& pred, std::string_view langtag) {
return triple_component::Iri::fromIriref(
absl::StrCat("@@", getPrimaryLanguage(langtag), "@@<",
asStringViewUnsafe(pred.getContent()), ">"));
}

} // namespace ad_utility
14 changes: 10 additions & 4 deletions src/util/Conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ constexpr std::string_view languageTaggedPredicatePrefix = "@";
// TODO<joka921> The overload that takes and returns `std::string` can be
// removed as soon as we also store strongly-typed IRIs in the predicates of the
// `SparqlTriple` class.
triple_component::Iri convertLangtagToEntityUri(const std::string& tag);
std::string convertToLanguageTaggedPredicate(const std::string& pred,
const std::string& langtag);
triple_component::Iri convertLangtagToEntityUri(std::string_view tag);
std::string convertToLanguageTaggedPredicate(std::string_view pred,
std::string_view langtag);
triple_component::Iri convertToLanguageTaggedPredicate(
const triple_component::Iri& pred, const std::string& langtag);
const triple_component::Iri& pred, std::string_view langtag);

// TODO<joka921> Comment.
std::string convertToLangmatchesTaggedPredicate(std::string_view pred,
std::string_view langtag);
triple_component::Iri convertToLangmatchesTaggedPredicate(
const triple_component::Iri& pred, std::string_view langtag);
} // namespace ad_utility
Loading