diff --git a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h index 0b4b7b8f48..aa239e9caf 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h +++ b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h @@ -99,6 +99,7 @@ class SparqlExpressionPimpl { struct LangFilterData { Variable variable_; std::string language_; + bool isLangmatches_ = false; }; std::optional getLanguageFilterExpression() const; diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 2392ec2b18..5467c96b5c 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -479,9 +479,28 @@ using EncodeForUriExpression = } }; -using LangMatches = +using LangMatchesImpl = StringExpressionImpl<2, decltype(langMatching), StringValueGetter>; +class LangMatches : public LangMatchesImpl { + public: + using LangMatchesImpl::LangMatchesImpl; + std::optional getLanguageFilterExpression() const override { + AD_CORRECTNESS_CHECK(children().size() == 2); + auto* var = dynamic_cast(children()[0].get()); + auto* str = + dynamic_cast(children()[1].get()); + if (!(var && str)) { + return std::nullopt; + } + // TODO We need to check whether the literal is plain. (no language + // tag or something else). + return LangFilterData{ + var->value(), + std::string(asStringViewUnsafe(str->value().getContent())), true}; + } +}; + // STRING WITH LANGUAGE TAG [[maybe_unused]] inline auto strLangTag = [](std::optional input, diff --git a/src/index/IndexBuilderTypes.h b/src/index/IndexBuilderTypes.h index 173e323095..eb57ac407e 100644 --- a/src/index/IndexBuilderTypes.h +++ b/src/index/IndexBuilderTypes.h @@ -243,7 +243,7 @@ auto getIdMapLambdas( // the allocation and deallocation of these hash maps (that are newly // created for each batch) much cheaper (see `CachingMemoryResource.h` and // `IndexImpl.cpp`). - itemArray[j]->map_.map_.reserve(5 * maxNumberOfTriples / NumThreads); + itemArray[j]->map_.map_.reserve(6 * maxNumberOfTriples / NumThreads); // The LANGUAGE_PREDICATE gets the first ID in each map. TODO // This is not necessary for the actual QLever code, but certain unit tests // currently fail without it. @@ -251,7 +251,7 @@ auto getIdMapLambdas( ad_utility::triple_component::Iri::fromIriref(LANGUAGE_PREDICATE)}); } using OptionalIds = - std::array>, 3>; + std::array>, 4>; /* given an index idx, returns a lambda that * - Takes a triple and a language tag @@ -278,6 +278,8 @@ auto getIdMapLambdas( .iriOrLiteral_.getIri(); auto langTaggedPredId = map.getId(TripleComponent{ ad_utility::convertToLanguageTaggedPredicate(iri, lt.langtag_)}); + auto langMatchesTaggedPredId = map.getId(TripleComponent{ + ad_utility::convertToLangmatchesTaggedPredicate(iri, lt.langtag_)}); auto& spoIds = *res[0]; // ids of original triple // TODO replace the std::array by an explicit IdTriple class, // then the emplace calls don't need the explicit type. @@ -299,6 +301,8 @@ auto getIdMapLambdas( ad_utility::triple_component::Iri::fromIriref( LANGUAGE_PREDICATE)}), langTagId, tripleGraphId}); + res[3].emplace( + Arr{spoIds[0], langMatchesTaggedPredId, spoIds[2], tripleGraphId}); } return res; }; diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index d669952cf1..5d9e68eec1 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1572, DateYearOrDuration{Date{2024, 10, 22}}}; + 1623, DateYearOrDuration{Date{2024, 11, 20}}}; } // namespace qlever diff --git a/src/parser/GraphPattern.h b/src/parser/GraphPattern.h index 48bd0c73c9..da58278526 100644 --- a/src/parser/GraphPattern.h +++ b/src/parser/GraphPattern.h @@ -43,8 +43,9 @@ class GraphPattern { // Modify query to take care of language filter. `variable` is the variable, // `languageInQuotes` is the language. - void addLanguageFilter(const Variable& variable, - const std::string& languageInQuotes); + [[nodiscard]] bool addLanguageFilter(const Variable& variable, + const std::string& languageInQuotes, + bool isLangmatches = false); bool _optional; diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index a08fc09f30..3fe038358e 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -265,8 +265,9 @@ void ParsedQuery::registerVariableVisibleInQueryBody(const Variable& variable) { ParsedQuery::GraphPattern::GraphPattern() : _optional(false) {} // __________________________________________________________________________ -void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, - const std::string& langTag) { +bool ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, + const std::string& langTag, + bool isLangmatches) { // Find all triples where the object is the `variable` and the predicate is // a simple `IRIREF` (neither a variable nor a complex property path). // Search in all the basic graph patterns, as filters have the complete @@ -275,6 +276,10 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, // Subqueries etc. // TODO Also support property paths (^rdfs:label, // skos:altLabel|rdfs:label, ...) + + if (isLangmatches && langTag.find('-') != std::string::npos) { + return false; + } std::vector matchingTriples; using BasicPattern = parsedQuery::BasicGraphPattern; namespace ad = ad_utility; @@ -295,14 +300,20 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, // Replace all the matching triples. for (auto* triplePtr : matchingTriples) { - triplePtr->p_._iri = ad_utility::convertToLanguageTaggedPredicate( - triplePtr->p_._iri, langTag); + triplePtr->p_._iri = isLangmatches + ? ad_utility::convertToLangmatchesTaggedPredicate( + triplePtr->p_._iri, langTag) + : ad_utility::convertToLanguageTaggedPredicate( + triplePtr->p_._iri, langTag); } // Handle the case, that no suitable triple (see above) was found. In this // case a triple `?variable ql:langtag "language"` is added at the end of // the graph pattern. if (matchingTriples.empty()) { + if (isLangmatches) { + return false; + } LOG(DEBUG) << "language filter variable " + variable.name() + " did not appear as object in any suitable " "triple. " @@ -326,6 +337,7 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, langEntity); t.push_back(std::move(triple)); } + return true; } // ____________________________________________________________________________ diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 19c5ea6d55..75cb1fb650 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -703,8 +703,12 @@ GraphPattern Visitor::visit(Parser::GroupGraphPatternContext* ctx) { if (auto langFilterData = filter.expression_.getLanguageFilterExpression(); langFilterData.has_value()) { - const auto& [variable, language] = langFilterData.value(); - pattern.addLanguageFilter(variable, language); + const auto& [variable, language, isLangmatches] = + langFilterData.value(); + if (!pattern.addLanguageFilter(variable, language, isLangmatches)) { + // TODO Code duplication. + pattern._filters.push_back(std::move(filter)); + } } else { pattern._filters.push_back(std::move(filter)); } diff --git a/src/util/Conversions.cpp b/src/util/Conversions.cpp index 8c2fe16006..605ba6b2f0 100644 --- a/src/util/Conversions.cpp +++ b/src/util/Conversions.cpp @@ -22,21 +22,39 @@ namespace ad_utility { // _________________________________________________________ -triple_component::Iri convertLangtagToEntityUri(const string& tag) { +triple_component::Iri convertLangtagToEntityUri(std::string_view tag) { return triple_component::Iri::fromIriref(makeQleverInternalIri("@", tag)); } // _________________________________________________________ -std::string convertToLanguageTaggedPredicate(const string& pred, - const string& langtag) { +std::string convertToLanguageTaggedPredicate(std::string_view pred, + std::string_view langtag) { return absl::StrCat("@", langtag, "@", pred); } +static std::string_view getPrimaryLanguage(std::string_view language) { + return language.substr(0, language.find('-')); +} + // _________________________________________________________ triple_component::Iri convertToLanguageTaggedPredicate( - const triple_component::Iri& pred, const std::string& langtag) { + const triple_component::Iri& pred, std::string_view langtag) { return triple_component::Iri::fromIriref(absl::StrCat( "@", langtag, "@<", asStringViewUnsafe(pred.getContent()), ">")); } +// _________________________________________________________ +std::string convertToLangmatchesTaggedPredicate(std::string_view pred, + std::string_view langtag) { + return absl::StrCat("@@", getPrimaryLanguage(langtag), "@@", pred); +} + +// _________________________________________________________ +triple_component::Iri convertToLangmatchesTaggedPredicate( + const triple_component::Iri& pred, std::string_view langtag) { + return triple_component::Iri::fromIriref( + absl::StrCat("@@", getPrimaryLanguage(langtag), "@@<", + asStringViewUnsafe(pred.getContent()), ">")); +} + } // namespace ad_utility diff --git a/src/util/Conversions.h b/src/util/Conversions.h index bbd4e901f2..ee6b9417c0 100644 --- a/src/util/Conversions.h +++ b/src/util/Conversions.h @@ -17,9 +17,15 @@ constexpr std::string_view languageTaggedPredicatePrefix = "@"; // TODO The overload that takes and returns `std::string` can be // removed as soon as we also store strongly-typed IRIs in the predicates of the // `SparqlTriple` class. -triple_component::Iri convertLangtagToEntityUri(const std::string& tag); -std::string convertToLanguageTaggedPredicate(const std::string& pred, - const std::string& langtag); +triple_component::Iri convertLangtagToEntityUri(std::string_view tag); +std::string convertToLanguageTaggedPredicate(std::string_view pred, + std::string_view langtag); triple_component::Iri convertToLanguageTaggedPredicate( - const triple_component::Iri& pred, const std::string& langtag); + const triple_component::Iri& pred, std::string_view langtag); + +// TODO Comment. +std::string convertToLangmatchesTaggedPredicate(std::string_view pred, + std::string_view langtag); +triple_component::Iri convertToLangmatchesTaggedPredicate( + const triple_component::Iri& pred, std::string_view langtag); } // namespace ad_utility