From 47feb7161a6e97fbdc91576d0e4a9703a189b3b2 Mon Sep 17 00:00:00 2001 From: DuDaAG Date: Thu, 24 Oct 2024 11:18:23 +0200 Subject: [PATCH 01/30] =?UTF-8?q?STRLEN=20f=C3=BCr=20UTF=208=20angepasst?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/engine/sparqlExpressions/StringExpressions.cpp | 6 ++++-- test/SparqlExpressionTest.cpp | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 93ba543005..8e4debfbb8 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,9 +125,11 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - return Id::makeFromInt(static_cast(s.size())); + // Counts UTF-8 characters by skipping continuation bytes (those starting with "10"). + auto utf8Len = std::count_if(s.begin(), s.end(), + [](char c) { return (static_cast(c) & 0xC0) != 0x80; } ); + return Id::makeFromInt(static_cast(utf8Len)); }; - using StrlenExpression = StringExpressionImpl<1, LiftStringFunction>; diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index ab5b3add68..977866d863 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -587,8 +587,8 @@ TEST(SparqlExpression, stringOperators) { // Test the different (optimized) behavior depending on whether the STR() // function was applied to the argument. - checkStrlen(IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, - Ids{I(3), U, U, I(0)}); + checkStrlen(IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, + Ids{I(3),I(6), U, U, I(0)}); checkStrlenWithStrChild( IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, Ids{I(3), I(1), I(3), I(0)}); From 4877e8f33ea3e0d872336fbf05e6a58910c933c1 Mon Sep 17 00:00:00 2001 From: DuDaAG Date: Thu, 24 Oct 2024 19:25:08 +0200 Subject: [PATCH 02/30] Test --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- test/SparqlExpressionTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 8e4debfbb8..6acaf1350d 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,7 +125,7 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Counts UTF-8 characters by skipping continuation bytes (those starting with "10"). + // Counts UTF-8 characters by skipping continuation bytes (those starting with "10").test auto utf8Len = std::count_if(s.begin(), s.end(), [](char c) { return (static_cast(c) & 0xC0) != 0x80; } ); return Id::makeFromInt(static_cast(utf8Len)); diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 977866d863..aa4ee9a284 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -586,7 +586,7 @@ TEST(SparqlExpression, stringOperators) { Ids{I(3), I(3), I(5), I(0)}); // Test the different (optimized) behavior depending on whether the STR() - // function was applied to the argument. + // function was applied to the argument. test checkStrlen(IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, Ids{I(3),I(6), U, U, I(0)}); checkStrlenWithStrChild( From aefd8898071a8878511153794abc45083df0830f Mon Sep 17 00:00:00 2001 From: DuDaAG Date: Thu, 24 Oct 2024 19:59:24 +0200 Subject: [PATCH 03/30] =?UTF-8?q?test=20r=C3=BCckg=C3=A4ning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- test/SparqlExpressionTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 6acaf1350d..8e4debfbb8 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,7 +125,7 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Counts UTF-8 characters by skipping continuation bytes (those starting with "10").test + // Counts UTF-8 characters by skipping continuation bytes (those starting with "10"). auto utf8Len = std::count_if(s.begin(), s.end(), [](char c) { return (static_cast(c) & 0xC0) != 0x80; } ); return Id::makeFromInt(static_cast(utf8Len)); diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index aa4ee9a284..977866d863 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -586,7 +586,7 @@ TEST(SparqlExpression, stringOperators) { Ids{I(3), I(3), I(5), I(0)}); // Test the different (optimized) behavior depending on whether the STR() - // function was applied to the argument. test + // function was applied to the argument. checkStrlen(IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, Ids{I(3),I(6), U, U, I(0)}); checkStrlenWithStrChild( From b2cb8c6c0dea8be00bbf51a7be3d42548a7b9cc2 Mon Sep 17 00:00:00 2001 From: DuDaAG Date: Fri, 25 Oct 2024 09:29:34 +0200 Subject: [PATCH 04/30] find pull-request --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 8e4debfbb8..1d57e14851 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,7 +125,7 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Counts UTF-8 characters by skipping continuation bytes (those starting with "10"). + // Count UTF-8 characters by skipping continuation bytes (those starting with "10"). auto utf8Len = std::count_if(s.begin(), s.end(), [](char c) { return (static_cast(c) & 0xC0) != 0x80; } ); return Id::makeFromInt(static_cast(utf8Len)); From 96b1959d1e179fc13910d5c273b592a78a27e9f6 Mon Sep 17 00:00:00 2001 From: DuDaAG Date: Fri, 25 Oct 2024 11:13:04 +0200 Subject: [PATCH 05/30] Fix test --- src/engine/sparqlExpressions/StringExpressions.cpp | 8 +++++--- test/SparqlExpressionTest.cpp | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 1d57e14851..85c9b754f0 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -125,9 +125,11 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Count UTF-8 characters by skipping continuation bytes (those starting with "10"). - auto utf8Len = std::count_if(s.begin(), s.end(), - [](char c) { return (static_cast(c) & 0xC0) != 0x80; } ); + // Count UTF-8 characters by skipping continuation bytes (those starting with + // "10"). + auto utf8Len = std::count_if(s.begin(), s.end(), [](char c) { + return (static_cast(c) & 0xC0) != 0x80; + }); return Id::makeFromInt(static_cast(utf8Len)); }; using StrlenExpression = diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 977866d863..707086ef70 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -587,8 +587,9 @@ TEST(SparqlExpression, stringOperators) { // Test the different (optimized) behavior depending on whether the STR() // function was applied to the argument. - checkStrlen(IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, - Ids{I(3),I(6), U, U, I(0)}); + checkStrlen( + IdOrLiteralOrIriVec{lit("one"), lit("tschüss"), I(1), D(3.6), lit("")}, + Ids{I(3), I(7), U, U, I(0)}); checkStrlenWithStrChild( IdOrLiteralOrIriVec{lit("one"), I(1), D(3.6), lit("")}, Ids{I(3), I(1), I(3), I(0)}); From b7806b899031bc1561b765ad72d15146dd239a0c Mon Sep 17 00:00:00 2001 From: DuDaAG <152475267+DuDaAG@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:51:12 +0100 Subject: [PATCH 06/30] Update src/engine/sparqlExpressions/StringExpressions.cpp Co-authored-by: Johannes Kalmbach --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 85c9b754f0..0f68d6ad98 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -127,7 +127,7 @@ using IriOrUriExpression = NARY<1, FV>; [[maybe_unused]] auto strlen = [](std::string_view s) { // Count UTF-8 characters by skipping continuation bytes (those starting with // "10"). - auto utf8Len = std::count_if(s.begin(), s.end(), [](char c) { + auto utf8Len = std::ranges::count_if(s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); return Id::makeFromInt(static_cast(utf8Len)); From a83d74b51114a97dce246be584f2d213c86893fa Mon Sep 17 00:00:00 2001 From: DuDaAG <152475267+DuDaAG@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:03:21 +0100 Subject: [PATCH 07/30] Update src/engine/sparqlExpressions/StringExpressions.cpp Co-authored-by: Johannes Kalmbach --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 0f68d6ad98..c1423e0ca6 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -130,7 +130,7 @@ using IriOrUriExpression = NARY<1, FV>; auto utf8Len = std::ranges::count_if(s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); - return Id::makeFromInt(static_cast(utf8Len)); + return Id::makeFromInt(utf8Len); }; using StrlenExpression = StringExpressionImpl<1, LiftStringFunction>; From e73d1abaf53f1f16b9a36e872500dd30ad89e6c5 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Fri, 1 Nov 2024 12:33:56 +0100 Subject: [PATCH 08/30] Format --- src/engine/sparqlExpressions/StringExpressions.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index c1423e0ca6..47ee97bd23 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -127,9 +127,8 @@ using IriOrUriExpression = NARY<1, FV>; [[maybe_unused]] auto strlen = [](std::string_view s) { // Count UTF-8 characters by skipping continuation bytes (those starting with // "10"). - auto utf8Len = std::ranges::count_if(s, [](char c) { - return (static_cast(c) & 0xC0) != 0x80; - }); + auto utf8Len = std::ranges::count_if( + s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); return Id::makeFromInt(utf8Len); }; using StrlenExpression = From 0ed79df75eec90f31cefdca653ac22e94d5ebd99 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Wed, 13 Nov 2024 10:26:42 +0100 Subject: [PATCH 09/30] new LiteralOrIriValueGetter --- src/engine/ExportQueryExecutionTrees.cpp | 76 ++++++++++++++++++- src/engine/ExportQueryExecutionTrees.h | 10 +++ .../SparqlExpressionValueGetters.cpp | 13 ++++ .../SparqlExpressionValueGetters.h | 14 ++++ .../sparqlExpressions/StringExpressions.cpp | 2 + test/CMakeLists.txt | 4 +- test/ExportQueryExecutionTreesTest.cpp | 58 ++++++++++++++ 7 files changed, 174 insertions(+), 3 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 873bd21ae7..4ee2c6fc4e 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -12,6 +12,8 @@ #include "util/ConstexprUtils.h" #include "util/http/MediaTypes.h" +using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + // __________________________________________________________________________ cppcoro::generator ExportQueryExecutionTrees::getIdTables( const Result& result) { @@ -222,11 +224,58 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { } } +// _____________________________________________________________________________ +std::optional +ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue(Id id) { + using enum Datatype; + auto fromIri = TripleComponent::Iri::fromIrirefWithoutBrackets; + switch (id.getDatatype()) { + case Undefined: + return std::nullopt; + case Double: + // We use the immediately invoked lambda here because putting this block + // in braces confuses the test coverage tool. + return [id]() -> std::optional { + // Format as integer if fractional part is zero, let C++ decide + // otherwise. + std::stringstream ss; + double d = id.getDouble(); + double dIntPart; + if (std::modf(d, &dIntPart) == 0.0) { + ss << std::fixed << std::setprecision(0) << id.getDouble(); + } else { + ss << d; + } + return LiteralOrIri::literalWithoutQuotes( + std::move(ss).str(), + TripleComponent::Iri::fromIrirefWithoutBrackets(XSD_DOUBLE_TYPE)); + }(); + case Bool: + return id.getBool() ? LiteralOrIri::literalWithoutQuotes( + "true", fromIri(XSD_BOOLEAN_TYPE)) + : LiteralOrIri::literalWithoutQuotes( + "false", fromIri(XSD_BOOLEAN_TYPE)); + case Int: + return LiteralOrIri::literalWithoutQuotes(std::to_string(id.getInt()), + fromIri(XSD_INT_TYPE)); + case Date: + return LiteralOrIri::literalWithoutQuotes( + id.getDate().toStringAndType().first, fromIri(XSD_DATE_TYPE)); + case GeoPoint: + return LiteralOrIri::literalWithoutQuotes( + id.getGeoPoint().toStringAndType().first, fromIri(GEO_WKT_LITERAL)); + case BlankNodeIndex: + return LiteralOrIri::literalWithoutQuotes( + absl::StrCat("_:bn", id.getBlankNodeIndex().get())); + default: + AD_FAIL(); + } +} + // _____________________________________________________________________________ ad_utility::triple_component::LiteralOrIri ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex( const Index& index, Id id, const LocalVocab& localVocab) { - using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; switch (id.getDatatype()) { case Datatype::LocalVocabIndex: return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri(); @@ -287,6 +336,31 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, return idToStringAndTypeForEncodedValue(id); } } + +// _____________________________________________________________________________ +std::optional ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab) { + using enum Datatype; + auto handleIriOrLiteral = + [](const LiteralOrIri& word) -> std::optional { + return word; + }; + switch (id.getDatatype()) { + case WordVocabIndex: + // TODO + return std::nullopt; + case VocabIndex: + case LocalVocabIndex: + return handleIriOrLiteral( + getLiteralOrIriFromVocabIndex(index, id, localVocab)); + case TextRecordIndex: + // TODO + return std::nullopt; + default: + return idToLiteralOrIriForEncodedValue(id); + } +} + // ___________________________________________________________________________ template std::optional> ExportQueryExecutionTrees::idToStringAndType( diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 339e7b2cf5..6871026db8 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -24,6 +24,7 @@ class ExportQueryExecutionTrees { public: using MediaType = ad_utility::MediaType; using CancellationHandle = ad_utility::SharedCancellationHandle; + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; // Compute the result of the given `parsedQuery` (created by the // `SparqlParser`) for which the `QueryExecutionTree` has been previously @@ -84,6 +85,15 @@ class ExportQueryExecutionTrees { static std::optional> idToStringAndTypeForEncodedValue(Id id); + // Same as the 'idToStringAndType' above but returning a LiteralOrIri instead + // of a std::pair + static std::optional idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab); + // Same as the previous function, but only handles the datatypes for which the + // value is encoded directly in the ID. For other datatypes an exception is + // thrown. + static std::optional idToLiteralOrIriForEncodedValue(Id id); + // Acts as a helper to retrieve an LiteralOrIri object // from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`. // This function should only be called with suitable `Datatype` Id's, diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 3ed18e7b99..399ccc3d6e 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -90,6 +90,19 @@ std::optional StringValueGetter::operator()( } } +// ____________________________________________________________________________ +std::optional LiteralOrIriValueGetter::operator()( + Id id, const EvaluationContext* context) const { + auto optionalLiteralOrIriAndType = + ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(), id, + context->_localVocab); + if (optionalLiteralOrIriAndType.has_value()) { + return std::move(optionalLiteralOrIriAndType.value()); + } else { + return std::nullopt; + } +} + // ____________________________________________________________________________ template Id IsSomethingValueGetter::operator()( diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 6e7cd310ec..3536c99f76 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -141,6 +141,20 @@ struct StringValueGetter : Mixin { } }; +// This class can be used as the `ValueGetter` argument of Expression +// templates. It produces a LiteralOrIri. +struct LiteralOrIriValueGetter : Mixin { + using Mixin::operator(); + + std::optional operator()(ValueId, + const EvaluationContext*) const; + + std::optional operator()(const LiteralOrIri& s, + const EvaluationContext*) const { + return s; + } +}; + // Value getter for `isBlank`. struct IsBlankNodeValueGetter : Mixin { using Mixin::operator(); diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 47ee97bd23..e81f1b3d18 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -181,6 +181,8 @@ class SubstrImpl { }; public: + // TODO Statt s vom Typ std::optional, s vom Typ + // std::optional IdOrLiteralOrIri operator()(std::optional s, NumericValue start, NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index eaf4b037de..aff3997072 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,7 +33,7 @@ endfunction() # required e.g. if several tests cases write to the same file. function(linkAndDiscoverTestSerial basename) linkTest(${basename} ${ARGN}) - gtest_discover_tests(${basename} ${basename} PROPERTIES RUN_SERIAL + gtest_discover_tests(${basename} ${basename} DISCOVERY_TIMEOUT 600 PROPERTIES RUN_SERIAL TRUE) endfunction() @@ -41,7 +41,7 @@ if (SINGLE_TEST_BINARY) message(STATUS "All tests are linked into a single executable `QLeverAllUnitTestsMain`") add_executable(QLeverAllUnitTestsMain) qlever_target_link_libraries(QLeverAllUnitTestsMain gtest gmock_main testUtil ${CMAKE_THREAD_LIBS_INIT}) - gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain PROPERTIES RUN_SERIAL + gtest_discover_tests(QLeverAllUnitTestsMain QLeverAllUnitTestsMain DISCOVERY_TIMEOUT 600 PROPERTIES RUN_SERIAL TRUE) else () message(STATUS "The tests are split over multiple binaries") diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index ed8482d66c..1961da0c8f 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1482,3 +1482,61 @@ TEST(ExportQueryExecutionTrees, convertGeneratorForChunkedTransfer) { AllOf(HasSubstr("!!!!>># An error has occurred"), HasSubstr("A very strange"))); } + +TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality1) { + std::string kg = "

31 . 42"; + auto qec = ad_utility::testing::getQec(kg); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + using enum Datatype; + + // Case VocabIndex + { + Id id = getId(""); + ASSERT_EQ(id.getDatatype(), VocabIndex); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), ""); + } + + // Case Int + { + Id id = ad_utility::testing::IntId(1); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"1\"^^"); + } + + // Case Double + { + Id id = ad_utility::testing::DoubleId(1.2); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"1.2\"^^"); + } + { + // Case Bool + Id id = ad_utility::testing::BoolId(true); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"true\"^^"); + } + { + // Case Date + Id id = ad_utility::testing::DateId(DateYearOrDuration::parseXsdDate, + "2024-11-07"); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"2024-11-07\"^^"); + } + // Case Undefined + { + Id id = ad_utility::testing::UndefId(); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral, std::nullopt); + } +} From 7078f6023373dd3e168dfb7bf47fb32f945494bd Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Fri, 22 Nov 2024 10:47:50 +0100 Subject: [PATCH 10/30] idToLiteralAndIri with specifiactions --- src/engine/ExportQueryExecutionTrees.cpp | 116 +++++++++------- src/engine/ExportQueryExecutionTrees.h | 21 ++- .../sparqlExpressions/StringExpressions.cpp | 37 ++++- test/ExportQueryExecutionTreesTest.cpp | 126 ++++++++++++------ 4 files changed, 201 insertions(+), 99 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 5a9a52fdcf..2c8651378e 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -14,7 +14,6 @@ #include "util/ConstexprUtils.h" #include "util/http/MediaTypes.h" - using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; // Return true iff the `result` is nonempty. @@ -352,50 +351,14 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { // _____________________________________________________________________________ std::optional -ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue(Id id) { - using enum Datatype; - auto fromIri = TripleComponent::Iri::fromIrirefWithoutBrackets; - switch (id.getDatatype()) { - case Undefined: - return std::nullopt; - case Double: - // We use the immediately invoked lambda here because putting this block - // in braces confuses the test coverage tool. - return [id]() -> std::optional { - // Format as integer if fractional part is zero, let C++ decide - // otherwise. - std::stringstream ss; - double d = id.getDouble(); - double dIntPart; - if (std::modf(d, &dIntPart) == 0.0) { - ss << std::fixed << std::setprecision(0) << id.getDouble(); - } else { - ss << d; - } - return LiteralOrIri::literalWithoutQuotes( - std::move(ss).str(), - TripleComponent::Iri::fromIrirefWithoutBrackets(XSD_DOUBLE_TYPE)); - }(); - case Bool: - return id.getBool() ? LiteralOrIri::literalWithoutQuotes( - "true", fromIri(XSD_BOOLEAN_TYPE)) - : LiteralOrIri::literalWithoutQuotes( - "false", fromIri(XSD_BOOLEAN_TYPE)); - case Int: - return LiteralOrIri::literalWithoutQuotes(std::to_string(id.getInt()), - fromIri(XSD_INT_TYPE)); - case Date: - return LiteralOrIri::literalWithoutQuotes( - id.getDate().toStringAndType().first, fromIri(XSD_DATE_TYPE)); - case GeoPoint: - return LiteralOrIri::literalWithoutQuotes( - id.getGeoPoint().toStringAndType().first, fromIri(GEO_WKT_LITERAL)); - case BlankNodeIndex: - return LiteralOrIri::literalWithoutQuotes( - absl::StrCat("_:bn", id.getBlankNodeIndex().get())); - default: - AD_FAIL(); +ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( + Id id, bool onlyReturnLiteralsWithXsdString) { + auto optionalStringAndType = idToStringAndTypeForEncodedValue(id); + if (!optionalStringAndType || onlyReturnLiteralsWithXsdString) { + return std::nullopt; } + + return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first); } // _____________________________________________________________________________ @@ -464,17 +427,57 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, } // _____________________________________________________________________________ +template std::optional ExportQueryExecutionTrees::idToLiteralOrIri( - const Index& index, Id id, const LocalVocab& localVocab) { + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString) { using enum Datatype; + auto datatype = id.getDatatype(); + if constexpr (onlyReturnLiterals) { + if (!(datatype == VocabIndex || datatype == LocalVocabIndex)) { + return std::nullopt; + } + } auto handleIriOrLiteral = - [](const LiteralOrIri& word) -> std::optional { + [onlyReturnLiteralsWithXsdString]( + const LiteralOrIri& word) -> std::optional { + if constexpr (onlyReturnLiterals) { + if (!word.isLiteral()) { + return std::nullopt; + } + } + // Return only literals without datatype or literals with xsd:string + // datatype + if (onlyReturnLiteralsWithXsdString) { + if (word.isLiteral()) { + if (!word.hasDatatype() || + (word.hasDatatype() && + std::string_view( + reinterpret_cast(word.getDatatype().data()), + word.getDatatype().size()) == XSD_STRING)) { + return word; + } + } + return std::nullopt; + } + + // If the literal has a datatype that is not xsd:string, remove the datatype + if (word.isLiteral()) { + if (word.hasDatatype() && + std::string_view( + reinterpret_cast(word.getDatatype().data()), + word.getDatatype().size()) != XSD_STRING) { + return LiteralOrIri{ + ad_utility::triple_component::Literal::literalWithNormalizedContent( + word.getContent())}; + } + } return word; }; - switch (id.getDatatype()) { + switch (datatype) { case WordVocabIndex: - // TODO - return std::nullopt; + return LiteralOrIri::literalWithoutQuotes( + index.indexToString(id.getWordVocabIndex())); case VocabIndex: case LocalVocabIndex: return handleIriOrLiteral( @@ -483,7 +486,8 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( // TODO return std::nullopt; default: - return idToLiteralOrIriForEncodedValue(id); + return idToLiteralOrIriForEncodedValue(id, + onlyReturnLiteralsWithXsdString); } } @@ -508,6 +512,18 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, const LocalVocab& localVocab, std::identity&& escapeFunction); +// ___________________________________________________________________________ +template std::optional +ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString); + +// ___________________________________________________________________________ +template std::optional +ExportQueryExecutionTrees::idToLiteralOrIri( + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString); + // Convert a stringvalue and optional type to JSON binding. static nlohmann::json stringAndTypeToBinding(std::string_view entitystr, const char* xsdType) { diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 19d493f95a..20af6830b5 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -70,14 +70,27 @@ class ExportQueryExecutionTrees { static std::optional> idToStringAndTypeForEncodedValue(Id id); - // Same as the 'idToStringAndType' above but returning a LiteralOrIri instead - // of a std::pair + // Converts an Id to a LiteralOrIri based on its type and value. + // For VocabIndex or LocalVocabIndex: Return Literal or Iri. If + // `onlyReturnLiteralsWithXsdString` is true, return only literals (no IRIs) + // with no datatype or datatype `xsd:string`; otherwise, return any literal, + // but strip datatypes other than `xsd:string`. For Double, Int, Bool, Date, + // or GeoPoint: Return the literal without the datatype. If + // `onlyReturnLiteralsWithXsdString` is true return `std::nullopt`. For + // Undefined Id: Always return `std::nullopt` + template static std::optional idToLiteralOrIri( - const Index& index, Id id, const LocalVocab& localVocab); + const Index& index, Id id, const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString = false); + // Same as the previous function, but only handles the datatypes for which the // value is encoded directly in the ID. For other datatypes an exception is // thrown. - static std::optional idToLiteralOrIriForEncodedValue(Id id); + // If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`. + // If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from + // literals (e.g., `42^^xsd:integer` becomes `"42"`). + static std::optional idToLiteralOrIriForEncodedValue( + Id id, bool onlyReturnLiteralsWithXsdString = false); // Acts as a helper to retrieve an LiteralOrIri object // from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`. diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index e81f1b3d18..9d167966d6 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -21,6 +21,14 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { asNormalizedStringViewUnsafe(normalizedContent))}; }; +constexpr auto toLiteralWithDescriptor = + [](std::string_view normalizedContent, + std::optional> descriptor) { + return LiteralOrIri{ + ad_utility::triple_component::Literal::literalWithNormalizedContent( + asNormalizedStringViewUnsafe(normalizedContent), descriptor)}; + }; + // String functions. [[maybe_unused]] auto strImpl = [](std::optional s) -> IdOrLiteralOrIri { @@ -183,7 +191,7 @@ class SubstrImpl { public: // TODO Statt s vom Typ std::optional, s vom Typ // std::optional - IdOrLiteralOrIri operator()(std::optional s, NumericValue start, + IdOrLiteralOrIri operator()(std::optional s, NumericValue start, NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { @@ -204,7 +212,21 @@ class SubstrImpl { lengthInt += startInt; } - const auto& str = s.value(); + const auto& str = asStringViewUnsafe(s.value().getContent()); + std::optional> descriptor; + + if (s->isLiteral()) { + if (s->hasLanguageTag()) { + descriptor = std::string(asStringViewUnsafe(s->getLanguageTag())); + } else if (s->hasDatatype()) { + descriptor = + ad_utility::triple_component::Iri::fromIrirefWithoutBrackets( + asStringViewUnsafe(s->getDatatype())); + } + } else { + descriptor = std::nullopt; + } + // Clamp the number such that it is in `[0, str.size()]`. That way we end up // with valid arguments for the `getUTF8Substring` method below for both // starting position and length since all the other corner cases have been @@ -219,13 +241,16 @@ class SubstrImpl { return static_cast(n); }; - return toLiteral( - ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt))); + return toLiteralWithDescriptor( + ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt)), + descriptor); } }; -using SubstrExpression = - StringExpressionImpl<3, SubstrImpl, NumericValueGetter, NumericValueGetter>; +using SubstrExpression = NARY<3, FV>; +// using SubstrExpression = +// StringExpressionImpl<3, SubstrImpl, NumericValueGetter, NumericValueGetter>; // STRSTARTS [[maybe_unused]] auto strStartsImpl = [](std::string_view text, diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 8524a3ce69..71db376555 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -330,6 +330,7 @@ std::chrono::milliseconds toChrono(std::string_view string) { } } // namespace +/* // ____________________________________________________________________________ TEST(ExportQueryExecutionTrees, Integers) { std::string kg = @@ -338,13 +339,16 @@ TEST(ExportQueryExecutionTrees, Integers) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - -42019234865781 + -42019234865781 - 42 + 42 - 4012934858173560 + 4012934858173560 )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 3, @@ -404,10 +408,12 @@ TEST(ExportQueryExecutionTrees, Bool) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - false + false - true + true )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 2, @@ -496,13 +502,16 @@ TEST(ExportQueryExecutionTrees, Floats) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - -42019234865780982022144 + -42019234865780982022144 - 4.01293e-12 + 4.01293e-12 - 42.2 + 42.2 )" + xmlTrailer; TestCaseSelectQuery testCaseFloat{ kg, query, 3, @@ -563,7 +572,8 @@ TEST(ExportQueryExecutionTrees, Dates) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - 1950-01-01T00:00:00 + 1950-01-01T00:00:00 )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -571,7 +581,8 @@ TEST(ExportQueryExecutionTrees, Dates) { "?o\n" "1950-01-01T00:00:00\n", // should be - // "\"1950-01-01T00:00:00\"^^\n", + // +"\"1950-01-01T00:00:00\"^^\n", // but that is a bug in the TSV export for another PR. Note: the duplicate // quotes are due to the escaping for CSV. "o\n" @@ -592,7 +603,8 @@ TEST(ExportQueryExecutionTrees, Dates) { // TSV "\t

\t\"1950-01-01T00:00:00\"^^\n", // missing - // "^^\n", + // +"^^\n", // CSV // TODO This format is wrong, but this is is due to the way that // CONSTRUCT queries are currently exported. This has to be fixed in a @@ -624,7 +636,8 @@ TEST(ExportQueryExecutionTrees, GeoPoints) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - POINT(50.000000 50.000000) + POINT(50.000000 50.000000) )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -632,7 +645,8 @@ TEST(ExportQueryExecutionTrees, GeoPoints) { "?o\n" "POINT(50.000000 50.000000)\n", // should be - // "\"POINT(50.000000 50.000000)\"^^\n", + // +"\"POINT(50.000000 50.000000)\"^^\n", // but that is a bug in the TSV export for another PR. Note: the duplicate // quotes are due to the escaping for CSV. "o\n" @@ -751,7 +765,8 @@ TEST(ExportQueryExecutionTrees, LiteralWithDatatype) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - something + something )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -1637,56 +1652,89 @@ TEST(ExportQueryExecutionTrees, convertGeneratorForChunkedTransfer) { AllOf(HasSubstr("!!!!>># An error has occurred"), HasSubstr("A very strange"))); } - -TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality1) { - std::string kg = "

31 . 42"; +*/ +TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { + std::string kg = + "

\"something\" .

1.

" + "\"some^^\" ."; auto qec = ad_utility::testing::getQec(kg); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); using enum Datatype; - // Case VocabIndex + // Case Literal With no Datatype { - Id id = getId(""); - ASSERT_EQ(id.getDatatype(), VocabIndex); + Id id = getId("\"something\""); auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), ""); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); + // Case onlyReturnLiterals + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); + // Case onlyReturnLiteralsWithXsdString + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, true); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); } - // Case Int + // Case Literal With Datatype String { - Id id = ad_utility::testing::IntId(1); + Id id = getId("\"some^^\""); auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"1\"^^"); + "\"some^^\""); + // TODO: Problem: The Literal has no Datatype + EXPECT_EQ(resultLiteral.value().hasDatatype(), false); + // Case onlyReturnLiterals + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"some^^\""); + // Case onlyReturnLiteralsWithXsdString + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, true); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), + "\"some^^\""); } - // Case Double + // TODO: Case Literal With Datatype not equal String { - Id id = ad_utility::testing::DoubleId(1.2); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"1.2\"^^"); + } + + // Case Iri { - // Case Bool - Id id = ad_utility::testing::BoolId(true); + Id id = getId(""); auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"true\"^^"); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), ""); + // Case onlyReturnLiterals + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral, std::nullopt); + // Case onlyReturnLiteralsWithXsdString + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, true); + EXPECT_EQ(resultLiteral, std::nullopt); } + + // Case Datatype Int { - // Case Date - Id id = ad_utility::testing::DateId(DateYearOrDuration::parseXsdDate, - "2024-11-07"); + Id id = ad_utility::testing::IntId(1); auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"2024-11-07\"^^"); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"1\""); + // Case onlyReturnLiterals + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral, std::nullopt); + // Case onlyReturnLiteralsWithXsdString + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, true); + EXPECT_EQ(resultLiteral, std::nullopt); } + // Case Undefined { Id id = ad_utility::testing::UndefId(); From 2581f4cca096d95d146ad3ae2766d16b1b5e82e5 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Fri, 22 Nov 2024 11:53:57 +0100 Subject: [PATCH 11/30] some fixes --- .../SparqlExpressionValueGetters.cpp | 11 ++--- .../sparqlExpressions/StringExpressions.cpp | 2 - test/ExportQueryExecutionTreesTest.cpp | 47 +++++++------------ test/SparqlExpressionTest.cpp | 12 ++++- 4 files changed, 29 insertions(+), 43 deletions(-) diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 399ccc3d6e..822387e3ee 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -93,14 +93,9 @@ std::optional StringValueGetter::operator()( // ____________________________________________________________________________ std::optional LiteralOrIriValueGetter::operator()( Id id, const EvaluationContext* context) const { - auto optionalLiteralOrIriAndType = - ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(), id, - context->_localVocab); - if (optionalLiteralOrIriAndType.has_value()) { - return std::move(optionalLiteralOrIriAndType.value()); - } else { - return std::nullopt; - } + //true means that immediately returns nullopt for everything that is not a literal + return ExportQueryExecutionTrees::idToLiteralOrIri( + context->_qec.getIndex(), id, context->_localVocab); } // ____________________________________________________________________________ diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 9d167966d6..29553996d0 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -189,8 +189,6 @@ class SubstrImpl { }; public: - // TODO Statt s vom Typ std::optional, s vom Typ - // std::optional IdOrLiteralOrIri operator()(std::optional s, NumericValue start, NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 71db376555..647cb3560e 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -330,7 +330,6 @@ std::chrono::milliseconds toChrono(std::string_view string) { } } // namespace -/* // ____________________________________________________________________________ TEST(ExportQueryExecutionTrees, Integers) { std::string kg = @@ -339,16 +338,13 @@ TEST(ExportQueryExecutionTrees, Integers) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - -42019234865781 + -42019234865781 - 42 + 42 - 4012934858173560 + 4012934858173560 )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 3, @@ -408,12 +404,10 @@ TEST(ExportQueryExecutionTrees, Bool) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - false + false - true + true )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 2, @@ -502,16 +496,13 @@ TEST(ExportQueryExecutionTrees, Floats) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - -42019234865780982022144 + -42019234865780982022144 - 4.01293e-12 + 4.01293e-12 - 42.2 + 42.2 )" + xmlTrailer; TestCaseSelectQuery testCaseFloat{ kg, query, 3, @@ -572,8 +563,7 @@ TEST(ExportQueryExecutionTrees, Dates) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - 1950-01-01T00:00:00 + 1950-01-01T00:00:00 )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -581,8 +571,7 @@ datatype="http://www.w3.org/2001/XMLSchema#dateTime">1950-01-01T00:00:00\n", + // "\"1950-01-01T00:00:00\"^^\n", // but that is a bug in the TSV export for another PR. Note: the duplicate // quotes are due to the escaping for CSV. "o\n" @@ -603,8 +592,7 @@ datatype="http://www.w3.org/2001/XMLSchema#dateTime">1950-01-01T00:00:00\t

\t\"1950-01-01T00:00:00\"^^\n", // missing - // -"^^\n", + // "^^\n", // CSV // TODO This format is wrong, but this is is due to the way that // CONSTRUCT queries are currently exported. This has to be fixed in a @@ -636,8 +624,7 @@ TEST(ExportQueryExecutionTrees, GeoPoints) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - POINT(50.000000 50.000000) + POINT(50.000000 50.000000) )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -645,8 +632,7 @@ datatype="http://www.opengis.net/ont/geosparql#wktLiteral">POINT(50.000000 50.00 "?o\n" "POINT(50.000000 50.000000)\n", // should be - // -"\"POINT(50.000000 50.000000)\"^^\n", + // "\"POINT(50.000000 50.000000)\"^^\n", // but that is a bug in the TSV export for another PR. Note: the duplicate // quotes are due to the escaping for CSV. "o\n" @@ -765,8 +751,7 @@ TEST(ExportQueryExecutionTrees, LiteralWithDatatype) { std::string expectedXml = makeXMLHeader({"o"}) + R"( - something + something )" + xmlTrailer; TestCaseSelectQuery testCase{ kg, query, 1, @@ -1652,7 +1637,7 @@ TEST(ExportQueryExecutionTrees, convertGeneratorForChunkedTransfer) { AllOf(HasSubstr("!!!!>># An error has occurred"), HasSubstr("A very strange"))); } -*/ + TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { std::string kg = "

\"something\" .

1.

" @@ -1742,4 +1727,4 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { qec->getIndex(), id, LocalVocab{}); EXPECT_EQ(resultLiteral, std::nullopt); } -} +} \ No newline at end of file diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index f55d8ed0d1..43984157e6 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -775,9 +775,9 @@ TEST(SparqlExpression, substr) { D(-3.8)); // Invalid datatypes - // First must be string. + // First must be LiteralOrIri auto Ux = IdOrLiteralOrIri{U}; - checkSubstr(Ux, I(3), I(4), I(7)); + //checkSubstr(Ux, I(3), I(4), I(7)); checkSubstr(Ux, U, I(4), I(7)); checkSubstr(Ux, Ux, I(4), I(7)); // Second and third must be numeric; @@ -787,6 +787,14 @@ TEST(SparqlExpression, substr) { checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), U); checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), IdOrLiteralOrIri{lit("bye")}); + + //WithDataType + checkSubstr(IdOrLiteralOrIriVec{lit("Hel", + "^^")}, + IdOrLiteralOrIriVec{lit("Hello", "^^")}, I(1), I(3)); + + //WithLanguageTag + checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, IdOrLiteralOrIriVec{lit("chat", "@en")}, I(1), I(3)); } // _____________________________________________________________________________________ From 6d7a2b2c9dfadfe48126b1703cb36976d0483701 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sun, 24 Nov 2024 11:58:25 +0100 Subject: [PATCH 12/30] Add Test IdToLiteralOrIri and some formatting --- src/engine/ExportQueryExecutionTrees.cpp | 70 +++++++++---------- src/engine/ExportQueryExecutionTrees.h | 5 ++ .../sparqlExpressions/StringExpressions.cpp | 4 +- test/ExportQueryExecutionTreesTest.cpp | 32 ++++++--- 4 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 2c8651378e..33a8d2c747 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -361,6 +361,35 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first); } +// _____________________________________________________________________________ +std::optional ExportQueryExecutionTrees::handleIriOrLiteral( + const LiteralOrIri& word, bool onlyReturnLiterals, + bool onlyReturnLiteralsWithXsdString) { + auto datatypeIsXSDString = [](const LiteralOrIri& word) -> bool { + return word.hasDatatype() && + std::string_view( + reinterpret_cast(word.getDatatype().data()), + word.getDatatype().size()) == XSD_STRING; + }; + + if (onlyReturnLiterals && !word.isLiteral()) { + return std::nullopt; + } + if (onlyReturnLiteralsWithXsdString) { + if (word.isLiteral() && + (!word.hasDatatype() || datatypeIsXSDString(word))) { + return word; + } + return std::nullopt; + } + if (word.isLiteral() && word.hasDatatype() && !datatypeIsXSDString(word)) { + return LiteralOrIri{ + ad_utility::triple_component::Literal::literalWithNormalizedContent( + word.getContent())}; + } + return word; +} + // _____________________________________________________________________________ ad_utility::triple_component::LiteralOrIri ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex( @@ -433,47 +462,13 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( bool onlyReturnLiteralsWithXsdString) { using enum Datatype; auto datatype = id.getDatatype(); + if constexpr (onlyReturnLiterals) { if (!(datatype == VocabIndex || datatype == LocalVocabIndex)) { return std::nullopt; } } - auto handleIriOrLiteral = - [onlyReturnLiteralsWithXsdString]( - const LiteralOrIri& word) -> std::optional { - if constexpr (onlyReturnLiterals) { - if (!word.isLiteral()) { - return std::nullopt; - } - } - // Return only literals without datatype or literals with xsd:string - // datatype - if (onlyReturnLiteralsWithXsdString) { - if (word.isLiteral()) { - if (!word.hasDatatype() || - (word.hasDatatype() && - std::string_view( - reinterpret_cast(word.getDatatype().data()), - word.getDatatype().size()) == XSD_STRING)) { - return word; - } - } - return std::nullopt; - } - // If the literal has a datatype that is not xsd:string, remove the datatype - if (word.isLiteral()) { - if (word.hasDatatype() && - std::string_view( - reinterpret_cast(word.getDatatype().data()), - word.getDatatype().size()) != XSD_STRING) { - return LiteralOrIri{ - ad_utility::triple_component::Literal::literalWithNormalizedContent( - word.getContent())}; - } - } - return word; - }; switch (datatype) { case WordVocabIndex: return LiteralOrIri::literalWithoutQuotes( @@ -481,9 +476,10 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( case VocabIndex: case LocalVocabIndex: return handleIriOrLiteral( - getLiteralOrIriFromVocabIndex(index, id, localVocab)); + getLiteralOrIriFromVocabIndex(index, id, localVocab), + onlyReturnLiterals, onlyReturnLiteralsWithXsdString); case TextRecordIndex: - // TODO + // TODO: Handle TextRecordIndex if needed return std::nullopt; default: return idToLiteralOrIriForEncodedValue(id, diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 20af6830b5..d9f7c67f9f 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -92,6 +92,11 @@ class ExportQueryExecutionTrees { static std::optional idToLiteralOrIriForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString = false); + // Checks and processes a LiteralOrIri based on the given flags. + static std::optional handleIriOrLiteral( + const LiteralOrIri& word, bool onlyReturnLiterals, + bool onlyReturnLiteralsWithXsdString); + // Acts as a helper to retrieve an LiteralOrIri object // from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`. // This function should only be called with suitable `Datatype` Id's, diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 29553996d0..2d52ba17f4 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -23,7 +23,7 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { constexpr auto toLiteralWithDescriptor = [](std::string_view normalizedContent, - std::optional> descriptor) { + const std::optional> descriptor) { return LiteralOrIri{ ad_utility::triple_component::Literal::literalWithNormalizedContent( asNormalizedStringViewUnsafe(normalizedContent), descriptor)}; @@ -247,8 +247,6 @@ class SubstrImpl { using SubstrExpression = NARY<3, FV>; -// using SubstrExpression = -// StringExpressionImpl<3, SubstrImpl, NumericValueGetter, NumericValueGetter>; // STRSTARTS [[maybe_unused]] auto strStartsImpl = [](std::string_view text, diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 647cb3560e..0e9e319868 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1640,8 +1640,9 @@ TEST(ExportQueryExecutionTrees, convertGeneratorForChunkedTransfer) { TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { std::string kg = - "

\"something\" .

1.

" - "\"some^^\" ."; + "

\"something\" .

1 .

" + "\"some\"^^ .

" + "\"dadudeldu\"^^ ."; auto qec = ad_utility::testing::getQec(kg); auto getId = ad_utility::testing::makeGetId(qec->getIndex()); using enum Datatype; @@ -1664,28 +1665,37 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { // Case Literal With Datatype String { - Id id = getId("\"some^^\""); + Id id = getId("\"some\"^^"); auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some^^\""); - // TODO: Problem: The Literal has no Datatype - EXPECT_EQ(resultLiteral.value().hasDatatype(), false); + "\"some\"^^"); // Case onlyReturnLiterals resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}); EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some^^\""); + "\"some\"^^"); // Case onlyReturnLiteralsWithXsdString resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( qec->getIndex(), id, LocalVocab{}, true); EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some^^\""); + "\"some\"^^"); } - // TODO: Case Literal With Datatype not equal String + // Case Literal With Datatype not equal String { - + Id id = getId("\"dadudeldu\"^^"); + auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"dadudeldu\""); + // Case onlyReturnLiterals + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}); + EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"dadudeldu\""); + // Case onlyReturnLiteralsWithXsdString + resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, true); + EXPECT_EQ(resultLiteral, std::nullopt); } // Case Iri @@ -1727,4 +1737,4 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { qec->getIndex(), id, LocalVocab{}); EXPECT_EQ(resultLiteral, std::nullopt); } -} \ No newline at end of file +} From 5948dcbc115745f2c5b90a1d67b15019a4f5b122 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sun, 24 Nov 2024 12:07:20 +0100 Subject: [PATCH 13/30] formatting --- .../SparqlExpressionValueGetters.cpp | 3 ++- .../SparqlExpressionValueGetters.h | 6 +++--- test/SparqlExpressionTest.cpp | 21 +++++++++++-------- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 822387e3ee..f6e7097984 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -93,7 +93,8 @@ std::optional StringValueGetter::operator()( // ____________________________________________________________________________ std::optional LiteralOrIriValueGetter::operator()( Id id, const EvaluationContext* context) const { - //true means that immediately returns nullopt for everything that is not a literal + // true means that immediately returns nullopt for everything that is not a + // literal return ExportQueryExecutionTrees::idToLiteralOrIri( context->_qec.getIndex(), id, context->_localVocab); } diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index b9fdc8f96b..195e8e3a0b 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -154,8 +154,8 @@ struct LiteralOrIriValueGetter : Mixin { return s; } }; - - // Value getter for `isBlank`. + +// Value getter for `isBlank`. struct IsBlankNodeValueGetter : Mixin { using Mixin::operator(); Id operator()(ValueId id, const EvaluationContext*) const { @@ -167,7 +167,7 @@ struct IsBlankNodeValueGetter : Mixin { } }; - // Boolean value getter that checks whether the given `Id` is a `ValueId` of the +// Boolean value getter that checks whether the given `Id` is a `ValueId` of the // given `datatype`. template struct IsValueIdValueGetter : Mixin> { diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 43984157e6..439261053a 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -777,7 +777,7 @@ TEST(SparqlExpression, substr) { // Invalid datatypes // First must be LiteralOrIri auto Ux = IdOrLiteralOrIri{U}; - //checkSubstr(Ux, I(3), I(4), I(7)); + // checkSubstr(Ux, I(3), I(4), I(7)); checkSubstr(Ux, U, I(4), I(7)); checkSubstr(Ux, Ux, I(4), I(7)); // Second and third must be numeric; @@ -787,14 +787,17 @@ TEST(SparqlExpression, substr) { checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), U); checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), IdOrLiteralOrIri{lit("bye")}); - - //WithDataType - checkSubstr(IdOrLiteralOrIriVec{lit("Hel", - "^^")}, - IdOrLiteralOrIriVec{lit("Hello", "^^")}, I(1), I(3)); - - //WithLanguageTag - checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, IdOrLiteralOrIriVec{lit("chat", "@en")}, I(1), I(3)); + + // WithDataType + checkSubstr(IdOrLiteralOrIriVec{lit( + "Hel", "^^")}, + IdOrLiteralOrIriVec{ + lit("Hello", "^^")}, + I(1), I(3)); + + // WithLanguageTag + checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, + IdOrLiteralOrIriVec{lit("chat", "@en")}, I(1), I(3)); } // _____________________________________________________________________________________ From 52ef1f59404472318302664cdde0a3b45e97d91f Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sun, 24 Nov 2024 16:50:50 +0100 Subject: [PATCH 14/30] Correction for sonar --- src/engine/ExportQueryExecutionTrees.cpp | 3 +-- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 33a8d2c747..4bbe363241 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -365,7 +365,7 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( std::optional ExportQueryExecutionTrees::handleIriOrLiteral( const LiteralOrIri& word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString) { - auto datatypeIsXSDString = [](const LiteralOrIri& word) -> bool { + auto datatypeIsXSDString = [](const LiteralOrIri& word) { return word.hasDatatype() && std::string_view( reinterpret_cast(word.getDatatype().data()), @@ -479,7 +479,6 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( getLiteralOrIriFromVocabIndex(index, id, localVocab), onlyReturnLiterals, onlyReturnLiteralsWithXsdString); case TextRecordIndex: - // TODO: Handle TextRecordIndex if needed return std::nullopt; default: return idToLiteralOrIriForEncodedValue(id, diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 2d52ba17f4..d7f7313a60 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -23,7 +23,7 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { constexpr auto toLiteralWithDescriptor = [](std::string_view normalizedContent, - const std::optional> descriptor) { + const std::optional>& descriptor) { return LiteralOrIri{ ad_utility::triple_component::Literal::literalWithNormalizedContent( asNormalizedStringViewUnsafe(normalizedContent), descriptor)}; From f15bf946b411fa9d84544e11eee05ac45eb8897a Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Fri, 29 Nov 2024 11:24:34 +0100 Subject: [PATCH 15/30] SubStr improvements --- src/engine/ExportQueryExecutionTrees.cpp | 27 ++++---- src/engine/ExportQueryExecutionTrees.h | 18 +++--- .../SparqlExpressionValueGetters.cpp | 24 ++++++- .../SparqlExpressionValueGetters.h | 11 ++++ .../sparqlExpressions/StringExpressions.cpp | 62 +++++++++++++------ src/parser/Literal.cpp | 9 +++ src/parser/Literal.h | 3 + src/parser/LiteralOrIri.cpp | 10 +++ src/parser/LiteralOrIri.h | 4 ++ 9 files changed, 124 insertions(+), 44 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 4bbe363241..a0d4f77917 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -353,8 +353,11 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { std::optional ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString) { + if(onlyReturnLiteralsWithXsdString){ + return std::nullopt; + } auto optionalStringAndType = idToStringAndTypeForEncodedValue(id); - if (!optionalStringAndType || onlyReturnLiteralsWithXsdString) { + if (!optionalStringAndType) { return std::nullopt; } @@ -363,29 +366,31 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( // _____________________________________________________________________________ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( - const LiteralOrIri& word, bool onlyReturnLiterals, + LiteralOrIri word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString) { auto datatypeIsXSDString = [](const LiteralOrIri& word) { return word.hasDatatype() && - std::string_view( - reinterpret_cast(word.getDatatype().data()), - word.getDatatype().size()) == XSD_STRING; + asStringViewUnsafe(word.getDatatype()) == XSD_STRING; }; - if (onlyReturnLiterals && !word.isLiteral()) { - return std::nullopt; + if (!word.isLiteral()) { + if(onlyReturnLiterals || onlyReturnLiteralsWithXsdString){ + return std::nullopt; + } + return word; } + if (onlyReturnLiteralsWithXsdString) { - if (word.isLiteral() && - (!word.hasDatatype() || datatypeIsXSDString(word))) { + if (!word.hasDatatype() || datatypeIsXSDString(word)) { return word; } return std::nullopt; } - if (word.isLiteral() && word.hasDatatype() && !datatypeIsXSDString(word)) { + + if (word.hasDatatype() && !datatypeIsXSDString(word)) { return LiteralOrIri{ ad_utility::triple_component::Literal::literalWithNormalizedContent( - word.getContent())}; + std::move(word.getContent()))}; } return word; } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index d9f7c67f9f..ba4c9c30e4 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -70,14 +70,10 @@ class ExportQueryExecutionTrees { static std::optional> idToStringAndTypeForEncodedValue(Id id); - // Converts an Id to a LiteralOrIri based on its type and value. - // For VocabIndex or LocalVocabIndex: Return Literal or Iri. If - // `onlyReturnLiteralsWithXsdString` is true, return only literals (no IRIs) - // with no datatype or datatype `xsd:string`; otherwise, return any literal, - // but strip datatypes other than `xsd:string`. For Double, Int, Bool, Date, - // or GeoPoint: Return the literal without the datatype. If - // `onlyReturnLiteralsWithXsdString` is true return `std::nullopt`. For - // Undefined Id: Always return `std::nullopt` + // Convert the `id` to a 'LiteralOrIri'.Datatypes are always stripped unless they are 'xsd:string', + // so for literals with non-'xsd:string' datatypes (this includes IDs that directly store their value, like Doubles) the datatypes are always empty. + // If 'onlyReturnLiteralsWithXsdString' is true, all IRIs and literals with non-'xsd:string' datatypes (including encoded IDs) return std::nullopt. + // These semantics are useful for the string expressions in StringExpressions.cpp. template static std::optional idToLiteralOrIri( const Index& index, Id id, const LocalVocab& localVocab, @@ -88,13 +84,13 @@ class ExportQueryExecutionTrees { // thrown. // If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`. // If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from - // literals (e.g., `42^^xsd:integer` becomes `"42"`). + // literals (e.g. the integer `42` is converted to the plain literal `"42"`). static std::optional idToLiteralOrIriForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString = false); - // Checks and processes a LiteralOrIri based on the given flags. + // A helper function for the `idToLiteralOrIri` function. Checks and processes a LiteralOrIri based on the given parameters. static std::optional handleIriOrLiteral( - const LiteralOrIri& word, bool onlyReturnLiterals, + LiteralOrIri word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString); // Acts as a helper to retrieve an LiteralOrIri object diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index f6e7097984..e99e76aaa2 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -93,12 +93,30 @@ std::optional StringValueGetter::operator()( // ____________________________________________________________________________ std::optional LiteralOrIriValueGetter::operator()( Id id, const EvaluationContext* context) const { - // true means that immediately returns nullopt for everything that is not a - // literal - return ExportQueryExecutionTrees::idToLiteralOrIri( + return ExportQueryExecutionTrees::idToLiteralOrIri( context->_qec.getIndex(), id, context->_localVocab); } +// ____________________________________________________________________________ +std::optional LiteralOrIriValueGetterWithXsdStringFilter::operator()( + Id id, const EvaluationContext* context) const { + return ExportQueryExecutionTrees::idToLiteralOrIri( + context->_qec.getIndex(), id, context->_localVocab, true); +} + +// ____________________________________________________________________________ +std::optional LiteralOrIriValueGetterWithXsdStringFilter::operator()( + const LiteralOrIri& s, const EvaluationContext*) const { + auto datatypeIsXSDString = [](const LiteralOrIri& word) { + return word.hasDatatype() && + asStringViewUnsafe(word.getDatatype()) == XSD_STRING; + }; + if (!s.hasDatatype() || datatypeIsXSDString(s)) { + return s; + } + return std::nullopt; +} + // ____________________________________________________________________________ template Id IsSomethingValueGetter::operator()( diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 195e8e3a0b..ba4233a1e0 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -155,6 +155,17 @@ struct LiteralOrIriValueGetter : Mixin { } }; +// Same as above but only literals with 'xsd:string' datatype or no datatype are returned. So only literals w +struct LiteralOrIriValueGetterWithXsdStringFilter : Mixin { + using Mixin::operator(); + + std::optional operator()(ValueId, + const EvaluationContext*) const; + + std::optional operator()(const LiteralOrIri& s, + const EvaluationContext*) const; +}; + // Value getter for `isBlank`. struct IsBlankNodeValueGetter : Mixin { using Mixin::operator(); diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index d7f7313a60..9d91a049b8 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -211,20 +211,6 @@ class SubstrImpl { } const auto& str = asStringViewUnsafe(s.value().getContent()); - std::optional> descriptor; - - if (s->isLiteral()) { - if (s->hasLanguageTag()) { - descriptor = std::string(asStringViewUnsafe(s->getLanguageTag())); - } else if (s->hasDatatype()) { - descriptor = - ad_utility::triple_component::Iri::fromIrirefWithoutBrackets( - asStringViewUnsafe(s->getDatatype())); - } - } else { - descriptor = std::nullopt; - } - // Clamp the number such that it is in `[0, str.size()]`. That way we end up // with valid arguments for the `getUTF8Substring` method below for both // starting position and length since all the other corner cases have been @@ -239,14 +225,52 @@ class SubstrImpl { return static_cast(n); }; - return toLiteralWithDescriptor( - ad_utility::getUTF8Substring(str, clamp(startInt), clamp(lengthInt)), - descriptor); + s.value().getLiteral().setSubstr(clamp(startInt), clamp(lengthInt)); + return s.value(); + } +}; + +// Implementation of the `SUBSTR` SPARQL function. It dynamically +// selects the appropriate value getter for the first argument based on whether +// it is a `STR()` expression (using `LiteralOrIriValueGetterWithXsdStringFilter`) +// or another type (using `LiteralOrIriValueGetter`). +class SubstrExpressionImpl : public SparqlExpression { + private: + using ExpressionWithStr = NARY<3, FV>; + using ExpressionWithoutStr = NARY<3, FV>; + + SparqlExpression::Ptr impl_; + + public: + explicit SubstrExpressionImpl( + SparqlExpression::Ptr child, + std::same_as auto... children) + requires(sizeof...(children) + 1 == 3) { + AD_CORRECTNESS_CHECK(child != nullptr); + + if (child->isStrExpression()) { + auto childrenOfStr = std::move(*child).moveChildrenOut(); + AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1); + impl_ = std::make_unique( + std::move(childrenOfStr.at(0)), std::move(children)...); + } else { + impl_ = std::make_unique(std::move(child), + std::move(children)...); + } + } + + ExpressionResult evaluate(EvaluationContext* context) const override { + return impl_->evaluate(context); + } + + std::string getCacheKey(const VariableToColumnMap& varColMap) const override { + return impl_->getCacheKey(varColMap); } }; -using SubstrExpression = NARY<3, FV>; +using SubstrExpression = SubstrExpressionImpl; // STRSTARTS [[maybe_unused]] auto strStartsImpl = [](std::string_view text, diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index a9fb4364e6..bdf5bc76ea 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -135,4 +135,13 @@ Literal Literal::fromStringRepresentation(std::string internal) { return Literal{std::move(internal), endIdx + 1}; } +// __________________________________________ +void Literal::setSubstr(std::size_t start, std::size_t length){ + auto contentWithoutParentesis = content_.substr(1, beginOfSuffix_ - 2); + auto shortenedContent = ad_utility::getUTF8Substring(contentWithoutParentesis, start, length); + auto suffix = content_.substr(beginOfSuffix_); + content_ = absl::StrCat("\"", shortenedContent, "\"", suffix); + beginOfSuffix_ = content_.size() - suffix.size(); +} + } // namespace ad_utility::triple_component diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 5367c261ae..13a564b044 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -90,5 +90,8 @@ class Literal { static Literal literalWithoutQuotes( std::string_view rdfContentWithoutQuotes, std::optional> descriptor = std::nullopt); + + // Set the substring of the current literal directly, based on start and length. + void setSubstr(std::size_t start, std::size_t length); }; } // namespace ad_utility::triple_component diff --git a/src/parser/LiteralOrIri.cpp b/src/parser/LiteralOrIri.cpp index 077b189c26..d9282be5d7 100644 --- a/src/parser/LiteralOrIri.cpp +++ b/src/parser/LiteralOrIri.cpp @@ -49,6 +49,16 @@ const Literal& LiteralOrIri::getLiteral() const { return std::get(data_); } +// __________________________________________ +Literal& LiteralOrIri::getLiteral(){ + if (!isLiteral()) { + AD_THROW( + "LiteralOrIri object does not contain an Literal object and " + "thus cannot return it"); + } + return std::get(data_); +} + // __________________________________________ bool LiteralOrIri::hasLanguageTag() const { return getLiteral().hasLanguageTag(); diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index 3f4bfa191e..d284ad8771 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -31,6 +31,10 @@ class alignas(16) LiteralOrIri { // otherwise const Literal& getLiteral() const; + // Return a modifiable reference to the contained Literal object if available, throw exception + // otherwise. Allows the caller to modify the Literal object e.g. for SubStr in StringExpressions.cpp + Literal& getLiteral(); + // Create a new LiteralOrIri based on a Literal object explicit LiteralOrIri(Literal literal); From 774d52ba196a3a9e70c2d246ffeb6e19553e71ae Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Fri, 29 Nov 2024 12:29:09 +0100 Subject: [PATCH 16/30] fix --- src/engine/sparqlExpressions/StringExpressions.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 9d91a049b8..e47c0d6b23 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -268,6 +268,11 @@ class SubstrExpressionImpl : public SparqlExpression { std::string getCacheKey(const VariableToColumnMap& varColMap) const override { return impl_->getCacheKey(varColMap); } + + private: + std::span childrenImpl() override { + return impl_->children(); + } }; using SubstrExpression = SubstrExpressionImpl; From d4b49c04fcaa59cb42b43cd7d4714ca3276a9154 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 30 Nov 2024 13:23:47 +0100 Subject: [PATCH 17/30] little changes --- src/engine/ExportQueryExecutionTrees.cpp | 2 +- test/SparqlExpressionTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index a0d4f77917..fd3255f5b5 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -390,7 +390,7 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( if (word.hasDatatype() && !datatypeIsXSDString(word)) { return LiteralOrIri{ ad_utility::triple_component::Literal::literalWithNormalizedContent( - std::move(word.getContent()))}; + word.getContent())}; } return word; } diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 439261053a..c6d009ce3e 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -788,7 +788,7 @@ TEST(SparqlExpression, substr) { checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), IdOrLiteralOrIri{lit("bye")}); - // WithDataType + // WithDataType xsd:string checkSubstr(IdOrLiteralOrIriVec{lit( "Hel", "^^")}, IdOrLiteralOrIriVec{ From 72aaa006e1504711b4864be3351b7ad2ea0d6f56 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 11:27:47 +0100 Subject: [PATCH 18/30] Feedback implemented --- src/engine/ExportQueryExecutionTrees.cpp | 19 +- src/engine/ExportQueryExecutionTrees.h | 18 +- .../SparqlExpressionValueGetters.cpp | 16 +- .../SparqlExpressionValueGetters.h | 6 +- .../sparqlExpressions/StringExpressions.cpp | 25 +-- src/parser/Literal.cpp | 17 +- src/parser/Literal.h | 6 +- src/parser/LiteralOrIri.cpp | 18 +- src/parser/LiteralOrIri.h | 5 +- test/ExportQueryExecutionTreesTest.cpp | 165 ++++++++---------- test/SparqlExpressionTest.cpp | 13 +- test/parser/LiteralOrIriTest.cpp | 32 ++++ 12 files changed, 192 insertions(+), 148 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index fd3255f5b5..f11e9b6a0e 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -353,7 +353,7 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { std::optional ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString) { - if(onlyReturnLiteralsWithXsdString){ + if (onlyReturnLiteralsWithXsdString) { return std::nullopt; } auto optionalStringAndType = idToStringAndTypeForEncodedValue(id); @@ -364,6 +364,13 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first); } +// _____________________________________________________________________________ +bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString( + const LiteralOrIri& word) { + return !word.hasDatatype() || + asStringViewUnsafe(word.getDatatype()) == XSD_STRING; +} + // _____________________________________________________________________________ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( LiteralOrIri word, bool onlyReturnLiterals, @@ -374,23 +381,21 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( }; if (!word.isLiteral()) { - if(onlyReturnLiterals || onlyReturnLiteralsWithXsdString){ + if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { return std::nullopt; } return word; } if (onlyReturnLiteralsWithXsdString) { - if (!word.hasDatatype() || datatypeIsXSDString(word)) { + if (isPlainLiteralOrLiteralWithXsdString(word)) { return word; } return std::nullopt; } - if (word.hasDatatype() && !datatypeIsXSDString(word)) { - return LiteralOrIri{ - ad_utility::triple_component::Literal::literalWithNormalizedContent( - word.getContent())}; + if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) { + word.getLiteral().removeDatatype(); } return word; } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index ba4c9c30e4..eae20f697f 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -70,10 +70,13 @@ class ExportQueryExecutionTrees { static std::optional> idToStringAndTypeForEncodedValue(Id id); - // Convert the `id` to a 'LiteralOrIri'.Datatypes are always stripped unless they are 'xsd:string', - // so for literals with non-'xsd:string' datatypes (this includes IDs that directly store their value, like Doubles) the datatypes are always empty. - // If 'onlyReturnLiteralsWithXsdString' is true, all IRIs and literals with non-'xsd:string' datatypes (including encoded IDs) return std::nullopt. - // These semantics are useful for the string expressions in StringExpressions.cpp. + // Convert the `id` to a 'LiteralOrIri. Datatypes are always stripped unless + // they are 'xsd:string', so for literals with non-'xsd:string' datatypes + // (this includes IDs that directly store their value, like Doubles) the + // datatype is always empty. If 'onlyReturnLiteralsWithXsdString' is true, all + // IRIs and literals with non'-xsd:string' datatypes (including encoded IDs) + // return 'std::nullopt'. These semantics are useful for the string + // expressions in StringExpressions.cpp. template static std::optional idToLiteralOrIri( const Index& index, Id id, const LocalVocab& localVocab, @@ -88,11 +91,16 @@ class ExportQueryExecutionTrees { static std::optional idToLiteralOrIriForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString = false); - // A helper function for the `idToLiteralOrIri` function. Checks and processes a LiteralOrIri based on the given parameters. + // A helper function for the `idToLiteralOrIri` function. Checks and processes + // a LiteralOrIri based on the given parameters. static std::optional handleIriOrLiteral( LiteralOrIri word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString); + // Checks if a LiteralOrIri is either a plain literal (without datatype) + // or a literal with the `xsd:string` datatype. + static bool isPlainLiteralOrLiteralWithXsdString(const LiteralOrIri& word); + // Acts as a helper to retrieve an LiteralOrIri object // from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`. // This function should only be called with suitable `Datatype` Id's, diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index e99e76aaa2..3dbd3e6a4f 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -93,25 +93,23 @@ std::optional StringValueGetter::operator()( // ____________________________________________________________________________ std::optional LiteralOrIriValueGetter::operator()( Id id, const EvaluationContext* context) const { - return ExportQueryExecutionTrees::idToLiteralOrIri( - context->_qec.getIndex(), id, context->_localVocab); + return ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(), + id, context->_localVocab); } // ____________________________________________________________________________ -std::optional LiteralOrIriValueGetterWithXsdStringFilter::operator()( +std::optional +LiteralOrIriValueGetterWithXsdStringFilter::operator()( Id id, const EvaluationContext* context) const { return ExportQueryExecutionTrees::idToLiteralOrIri( context->_qec.getIndex(), id, context->_localVocab, true); } // ____________________________________________________________________________ -std::optional LiteralOrIriValueGetterWithXsdStringFilter::operator()( +std::optional +LiteralOrIriValueGetterWithXsdStringFilter::operator()( const LiteralOrIri& s, const EvaluationContext*) const { - auto datatypeIsXSDString = [](const LiteralOrIri& word) { - return word.hasDatatype() && - asStringViewUnsafe(word.getDatatype()) == XSD_STRING; - }; - if (!s.hasDatatype() || datatypeIsXSDString(s)) { + if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) { return s; } return std::nullopt; diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index ba4233a1e0..159040eedd 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -155,8 +155,10 @@ struct LiteralOrIriValueGetter : Mixin { } }; -// Same as above but only literals with 'xsd:string' datatype or no datatype are returned. So only literals w -struct LiteralOrIriValueGetterWithXsdStringFilter : Mixin { +// Same as above but only literals with 'xsd:string' datatype or no datatype are +// returned. +struct LiteralOrIriValueGetterWithXsdStringFilter + : Mixin { using Mixin::operator(); std::optional operator()(ValueId, diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index e47c0d6b23..6a884ced03 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -21,7 +21,7 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { asNormalizedStringViewUnsafe(normalizedContent))}; }; -constexpr auto toLiteralWithDescriptor = +static constexpr auto toLiteralWithDescriptor = [](std::string_view normalizedContent, const std::optional>& descriptor) { return LiteralOrIri{ @@ -212,7 +212,7 @@ class SubstrImpl { const auto& str = asStringViewUnsafe(s.value().getContent()); // Clamp the number such that it is in `[0, str.size()]`. That way we end up - // with valid arguments for the `getUTF8Substring` method below for both + // with valid arguments for the `setSubstr` method below for both // starting position and length since all the other corner cases have been // dealt with above. auto clamp = [sz = str.size()](int64_t n) -> std::size_t { @@ -226,20 +226,23 @@ class SubstrImpl { }; s.value().getLiteral().setSubstr(clamp(startInt), clamp(lengthInt)); - return s.value(); + return std::move(s.value()); } }; -// Implementation of the `SUBSTR` SPARQL function. It dynamically -// selects the appropriate value getter for the first argument based on whether -// it is a `STR()` expression (using `LiteralOrIriValueGetterWithXsdStringFilter`) -// or another type (using `LiteralOrIriValueGetter`). +// Implementation of the `SUBSTR` SPARQL function. It dynamically +// selects the appropriate value getter for the first argument based on whether +// it is a `STR()` expression (using +// `LiteralOrIriValueGetterWithXsdStringFilter`) or another type (using +// `LiteralOrIriValueGetter`). class SubstrExpressionImpl : public SparqlExpression { private: - using ExpressionWithStr = NARY<3, FV>; - using ExpressionWithoutStr = NARY<3, FV>; + using ExpressionWithStr = + NARY<3, FV>; + using ExpressionWithoutStr = + NARY<3, FV>; SparqlExpression::Ptr impl_; diff --git a/src/parser/Literal.cpp b/src/parser/Literal.cpp index bdf5bc76ea..baeba75d71 100644 --- a/src/parser/Literal.cpp +++ b/src/parser/Literal.cpp @@ -136,12 +136,17 @@ Literal Literal::fromStringRepresentation(std::string internal) { } // __________________________________________ -void Literal::setSubstr(std::size_t start, std::size_t length){ - auto contentWithoutParentesis = content_.substr(1, beginOfSuffix_ - 2); - auto shortenedContent = ad_utility::getUTF8Substring(contentWithoutParentesis, start, length); - auto suffix = content_.substr(beginOfSuffix_); - content_ = absl::StrCat("\"", shortenedContent, "\"", suffix); - beginOfSuffix_ = content_.size() - suffix.size(); +void Literal::setSubstr(std::size_t start, std::size_t length) { + std::size_t contentLength = beginOfSuffix_ - 2; + content_.erase(1 + start + length, contentLength - start - length); + content_.erase(1, start); + beginOfSuffix_ = beginOfSuffix_ - (contentLength - length); +} + +// __________________________________________ +void Literal::removeDatatype() { + content_.erase(beginOfSuffix_); + beginOfSuffix_ = content_.size(); } } // namespace ad_utility::triple_component diff --git a/src/parser/Literal.h b/src/parser/Literal.h index 13a564b044..6750e57aa5 100644 --- a/src/parser/Literal.h +++ b/src/parser/Literal.h @@ -91,7 +91,11 @@ class Literal { std::string_view rdfContentWithoutQuotes, std::optional> descriptor = std::nullopt); - // Set the substring of the current literal directly, based on start and length. + // Set the substring of the literal by erasing the part between the + // end of the prefix and the trailing " from content_. void setSubstr(std::size_t start, std::size_t length); + + // Remove the datatype suffix from the Literal. + void removeDatatype(); }; } // namespace ad_utility::triple_component diff --git a/src/parser/LiteralOrIri.cpp b/src/parser/LiteralOrIri.cpp index d9282be5d7..3ecdeb82a3 100644 --- a/src/parser/LiteralOrIri.cpp +++ b/src/parser/LiteralOrIri.cpp @@ -41,21 +41,17 @@ bool LiteralOrIri::isLiteral() const { // __________________________________________ const Literal& LiteralOrIri::getLiteral() const { - if (!isLiteral()) { - AD_THROW( - "LiteralOrIri object does not contain an Literal object and " - "thus cannot return it"); - } + AD_CONTRACT_CHECK(isLiteral(), + "LiteralOrIri object does not contain a Literal object and " + "thus cannot return it"); return std::get(data_); } // __________________________________________ -Literal& LiteralOrIri::getLiteral(){ - if (!isLiteral()) { - AD_THROW( - "LiteralOrIri object does not contain an Literal object and " - "thus cannot return it"); - } +Literal& LiteralOrIri::getLiteral() { + AD_CONTRACT_CHECK(isLiteral(), + "LiteralOrIri object does not contain a Literal object and " + "thus cannot return it"); return std::get(data_); } diff --git a/src/parser/LiteralOrIri.h b/src/parser/LiteralOrIri.h index d284ad8771..ec6a850a7a 100644 --- a/src/parser/LiteralOrIri.h +++ b/src/parser/LiteralOrIri.h @@ -31,8 +31,9 @@ class alignas(16) LiteralOrIri { // otherwise const Literal& getLiteral() const; - // Return a modifiable reference to the contained Literal object if available, throw exception - // otherwise. Allows the caller to modify the Literal object e.g. for SubStr in StringExpressions.cpp + // Return a modifiable reference to the contained Literal object if available, + // throw exception otherwise. Allows the caller to modify the Literal object + // e.g. for SubStr in StringExpressions.cpp Literal& getLiteral(); // Create a new LiteralOrIri based on a Literal object diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 0e9e319868..2813c07cab 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1647,94 +1647,83 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { auto getId = ad_utility::testing::makeGetId(qec->getIndex()); using enum Datatype; - // Case Literal With no Datatype - { - Id id = getId("\"something\""); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); - // Case onlyReturnLiterals - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); - // Case onlyReturnLiteralsWithXsdString - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}, true); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"something\""); - } - - // Case Literal With Datatype String - { - Id id = getId("\"some\"^^"); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some\"^^"); - // Case onlyReturnLiterals - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some\"^^"); - // Case onlyReturnLiteralsWithXsdString - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}, true); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), - "\"some\"^^"); - } - - // Case Literal With Datatype not equal String - { - Id id = getId("\"dadudeldu\"^^"); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"dadudeldu\""); - // Case onlyReturnLiterals - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"dadudeldu\""); - // Case onlyReturnLiteralsWithXsdString - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}, true); - EXPECT_EQ(resultLiteral, std::nullopt); - } - - // Case Iri - { - Id id = getId(""); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), ""); - // Case onlyReturnLiterals - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral, std::nullopt); - // Case onlyReturnLiteralsWithXsdString - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}, true); - EXPECT_EQ(resultLiteral, std::nullopt); - } - - // Case Datatype Int - { - Id id = ad_utility::testing::IntId(1); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral.value().toStringRepresentation(), "\"1\""); - // Case onlyReturnLiterals - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral, std::nullopt); - // Case onlyReturnLiteralsWithXsdString - resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}, true); - EXPECT_EQ(resultLiteral, std::nullopt); - } + auto callIdToLiteralOrIri = [&](Id id, bool onlyLiterals, + bool onlyLiteralsWithXsdString = false) { + if (onlyLiterals) { + return ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); + } else { + return ExportQueryExecutionTrees::idToLiteralOrIri( + qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); + } + }; - // Case Undefined - { - Id id = ad_utility::testing::UndefId(); - auto resultLiteral = ExportQueryExecutionTrees::idToLiteralOrIri( - qec->getIndex(), id, LocalVocab{}); - EXPECT_EQ(resultLiteral, std::nullopt); + auto checkIdToLiteralOrIri = + [&](Id id, + const std::vector>>& + cases) { + for (const auto& [onlyLiterals, onlyLiteralsWithXsdString, expected] : + cases) { + auto result = + callIdToLiteralOrIri(id, onlyLiterals, onlyLiteralsWithXsdString); + if (expected) { + EXPECT_THAT(result, + ::testing::Optional(::testing::ResultOf( + [](const auto& literalOrIri) { + return literalOrIri.toStringRepresentation(); + }, + ::testing::StrEq(*expected)))); + } else { + EXPECT_EQ(result, std::nullopt); + } + } + }; + + // Test cases: Each tuple describes one test case. + // The first element is the ID of the element to test. + // The second element is a list of 3 configurations: + // 1. no restrictions 2.only literals are considered + // 3.only literals with `xsd:string` or no datatype are considered + std::vector>>>> + testCases = { + // Case: Literal without datatype + {getId("\"something\""), + {{false, false, "\"something\""}, + {true, false, "\"something\""}, + {false, true, "\"something\""}}}, + + // Case: Literal with datatype `xsd:string` + {getId("\"some\"^^"), + {{false, false, + "\"some\"^^"}, + {true, false, + "\"some\"^^"}, + {false, true, + "\"some\"^^"}}}, + + // Case: Literal with unknown datatype + {getId("\"dadudeldu\"^^"), + {{false, false, "\"dadudeldu\""}, + {true, false, "\"dadudeldu\""}, + {false, true, std::nullopt}}}, + + // Case: IRI + {getId(""), + {{false, false, ""}, + {true, false, std::nullopt}, + {false, true, std::nullopt}}}, + + // Case: datatype `Int` + {ad_utility::testing::IntId(1), + {{false, false, "\"1\""}, + {true, false, std::nullopt}, + {false, true, std::nullopt}}}, + + // Case: Undefined ID + {ad_utility::testing::UndefId(), {{false, false, std::nullopt}}}}; + + for (const auto& [id, cases] : testCases) { + checkIdToLiteralOrIri(id, cases); } } diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index c6d009ce3e..11f21ff73e 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -777,7 +777,6 @@ TEST(SparqlExpression, substr) { // Invalid datatypes // First must be LiteralOrIri auto Ux = IdOrLiteralOrIri{U}; - // checkSubstr(Ux, I(3), I(4), I(7)); checkSubstr(Ux, U, I(4), I(7)); checkSubstr(Ux, Ux, I(4), I(7)); // Second and third must be numeric; @@ -789,11 +788,13 @@ TEST(SparqlExpression, substr) { IdOrLiteralOrIri{lit("bye")}); // WithDataType xsd:string - checkSubstr(IdOrLiteralOrIriVec{lit( - "Hel", "^^")}, - IdOrLiteralOrIriVec{ - lit("Hello", "^^")}, - I(1), I(3)); + checkSubstr( + IdOrLiteralOrIriVec{lit( + "Hel", + "^^")}, // codespell-ignore + IdOrLiteralOrIriVec{ + lit("Hello", "^^")}, + I(1), I(3)); // WithLanguageTag checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, diff --git a/test/parser/LiteralOrIriTest.cpp b/test/parser/LiteralOrIriTest.cpp index 0c5486fc7b..479113cfcb 100644 --- a/test/parser/LiteralOrIriTest.cpp +++ b/test/parser/LiteralOrIriTest.cpp @@ -209,3 +209,35 @@ TEST(LiteralOrIri, Hashing) { ad_utility::HashSet set{lit, iri}; EXPECT_THAT(set, ::testing::UnorderedElementsAre(lit, iri)); } + +// _______________________________________________________________________ +TEST(LiteralTest, SetSubstr) { + LiteralOrIri literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().setSubstr(0, 5); + EXPECT_THAT("Hello", + asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", + asStringViewUnsafe(literal.getDatatype())); + + literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().setSubstr(6, 5); + EXPECT_THAT("World", + asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", + asStringViewUnsafe(literal.getDatatype())); +} + +// _______________________________________________________________________ +TEST(LiteralTest, RemoveDatatype) { + LiteralOrIri literal = LiteralOrIri::literalWithoutQuotes( + "Hello World!", + Iri::fromIriref("")); + literal.getLiteral().removeDatatype(); + EXPECT_THAT("Hello World!", asStringViewUnsafe(literal.getContent())); + EXPECT_FALSE(literal.hasDatatype()); + EXPECT_THROW(literal.getDatatype(), ad_utility::Exception); +} \ No newline at end of file From 25000a91c0a6e4291bfaee5fb941c0c4deffa42b Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 11:28:47 +0100 Subject: [PATCH 19/30] format --- test/parser/LiteralOrIriTest.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/parser/LiteralOrIriTest.cpp b/test/parser/LiteralOrIriTest.cpp index 479113cfcb..27ddd573a1 100644 --- a/test/parser/LiteralOrIriTest.cpp +++ b/test/parser/LiteralOrIriTest.cpp @@ -216,17 +216,15 @@ TEST(LiteralTest, SetSubstr) { "Hello World!", Iri::fromIriref("")); literal.getLiteral().setSubstr(0, 5); - EXPECT_THAT("Hello", - asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("Hello", asStringViewUnsafe(literal.getContent())); EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", asStringViewUnsafe(literal.getDatatype())); - + literal = LiteralOrIri::literalWithoutQuotes( "Hello World!", Iri::fromIriref("")); literal.getLiteral().setSubstr(6, 5); - EXPECT_THAT("World", - asStringViewUnsafe(literal.getContent())); + EXPECT_THAT("World", asStringViewUnsafe(literal.getContent())); EXPECT_THAT("http://www.w3.org/2001/XMLSchema#string", asStringViewUnsafe(literal.getDatatype())); } @@ -240,4 +238,4 @@ TEST(LiteralTest, RemoveDatatype) { EXPECT_THAT("Hello World!", asStringViewUnsafe(literal.getContent())); EXPECT_FALSE(literal.hasDatatype()); EXPECT_THROW(literal.getDatatype(), ad_utility::Exception); -} \ No newline at end of file +} From 617c3b7f85006d3d00741964f414b433d3330129 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 11:34:02 +0100 Subject: [PATCH 20/30] New position codespell-ignore --- test/SparqlExpressionTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 11f21ff73e..f30aee59e3 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -790,8 +790,8 @@ TEST(SparqlExpression, substr) { // WithDataType xsd:string checkSubstr( IdOrLiteralOrIriVec{lit( - "Hel", - "^^")}, // codespell-ignore + "Hel", // codespell-ignore + "^^")}, IdOrLiteralOrIriVec{ lit("Hello", "^^")}, I(1), I(3)); From f631ec270700b02bca55d23c3d0489b9bb842a36 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 11:39:04 +0100 Subject: [PATCH 21/30] delete codespell-ignore --- test/SparqlExpressionTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index f30aee59e3..e3201fc24d 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -790,11 +790,11 @@ TEST(SparqlExpression, substr) { // WithDataType xsd:string checkSubstr( IdOrLiteralOrIriVec{lit( - "Hel", // codespell-ignore + "Hello", "^^")}, IdOrLiteralOrIriVec{ - lit("Hello", "^^")}, - I(1), I(3)); + lit("Hello World", "^^")}, + I(1), I(5)); // WithLanguageTag checkSubstr(IdOrLiteralOrIriVec{lit("cha", "@en")}, From 889e9ddd1f90430f5c6dfa923392adb7bed720a0 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 19:49:53 +0100 Subject: [PATCH 22/30] UTF8 handling in subStr --- src/engine/ExportQueryExecutionTrees.cpp | 6 +-- .../sparqlExpressions/StringExpressions.cpp | 50 ++++++++++++------- test/SparqlExpressionTest.cpp | 6 --- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index f11e9b6a0e..8f8b34fb41 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -375,11 +375,7 @@ bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString( std::optional ExportQueryExecutionTrees::handleIriOrLiteral( LiteralOrIri word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString) { - auto datatypeIsXSDString = [](const LiteralOrIri& word) { - return word.hasDatatype() && - asStringViewUnsafe(word.getDatatype()) == XSD_STRING; - }; - + if (!word.isLiteral()) { if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { return std::nullopt; diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 6a884ced03..2e7346a059 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -21,13 +21,28 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { asNormalizedStringViewUnsafe(normalizedContent))}; }; -static constexpr auto toLiteralWithDescriptor = - [](std::string_view normalizedContent, - const std::optional>& descriptor) { - return LiteralOrIri{ - ad_utility::triple_component::Literal::literalWithNormalizedContent( - asNormalizedStringViewUnsafe(normalizedContent), descriptor)}; - }; +// Count UTF-8 characters by skipping continuation bytes (those starting with + // "10"). +inline std::size_t utf8Length(std::string_view s) { + return std::ranges::count_if( + s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); +} + +// Convert UTF-8 position to byte offset +inline std::size_t utf8ToByteOffset(std::string_view str, int64_t utf8Pos) { + std::size_t byteOffset = 0; + int64_t charCount = 0; + + for (char c : str) { + if ((static_cast(c) & 0xC0) != 0x80) { + if (charCount++ == utf8Pos) { + break; + } + } + ++byteOffset; + } + return byteOffset; +} // String functions. [[maybe_unused]] auto strImpl = @@ -133,11 +148,7 @@ using IriOrUriExpression = NARY<1, FV>; // STRLEN [[maybe_unused]] auto strlen = [](std::string_view s) { - // Count UTF-8 characters by skipping continuation bytes (those starting with - // "10"). - auto utf8Len = std::ranges::count_if( - s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); - return Id::makeFromInt(utf8Len); + return Id::makeFromInt(utf8Length(s)); }; using StrlenExpression = StringExpressionImpl<1, LiftStringFunction>; @@ -209,23 +220,28 @@ class SubstrImpl { if (startInt < 0) { lengthInt += startInt; } - const auto& str = asStringViewUnsafe(s.value().getContent()); + std::size_t utf8len = utf8Length(str); // Clamp the number such that it is in `[0, str.size()]`. That way we end up // with valid arguments for the `setSubstr` method below for both // starting position and length since all the other corner cases have been // dealt with above. - auto clamp = [sz = str.size()](int64_t n) -> std::size_t { + auto clamp = [utf8len](int64_t n) -> std::size_t { if (n < 0) { return 0; } - if (static_cast(n) > sz) { - return sz; + if (static_cast(n) > utf8len) { + return utf8len; } return static_cast(n); }; - s.value().getLiteral().setSubstr(clamp(startInt), clamp(lengthInt)); + startInt = clamp(startInt); + lengthInt = clamp(lengthInt); + std::size_t startByteOffset = utf8ToByteOffset(str, startInt); + std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); + std::size_t byteLength = endByteOffset - startByteOffset; + s.value().getLiteral().setSubstr(startByteOffset, byteLength); return std::move(s.value()); } }; diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index e3201fc24d..a944129255 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -743,28 +743,23 @@ TEST(SparqlExpression, substr) { I(0), I(12)); checkSubstr(strs({"one", "two", "three"}), strs({"one", "two", "three"}), I(-2), I(12)); - checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), I(2), I(12)); checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), D(1.8), D(11.7)); checkSubstr(strs({"ne", "wo", "hree"}), strs({"one", "two", "three"}), D(2.449), D(12.449)); - // An actual substring from the middle checkSubstr(strs({"es", "os", "re"}), strs({"ones", "twos", "threes"}), I(3), I(2)); - // Subtle corner case if the starting position is negative // Only the letters at positions `p < -3 + 6 = 3` are exported (the first two // letters, remember that the positions are 1-based). checkSubstr(strs({"on", "tw", "th"}), strs({"ones", "twos", "threes"}), I(-3), I(6)); - // Correct handling of UTF-8 multibyte characters. checkSubstr(strs({"pfel", "pfel", "pfel"}), strs({"uApfel", "uÄpfel", "uöpfel"}), I(3), I(18)); - // corner cases: 0 or negative length, or invalid numeric parameter checkSubstr(strs({"", "", ""}), strs({"ones", "twos", "threes"}), D(naN), I(2)); @@ -786,7 +781,6 @@ TEST(SparqlExpression, substr) { checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), U); checkSubstr(Ux, IdOrLiteralOrIri{lit("hello")}, I(4), IdOrLiteralOrIri{lit("bye")}); - // WithDataType xsd:string checkSubstr( IdOrLiteralOrIriVec{lit( From 0c416031a61977b4016d15e1d80320aad8516a30 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 7 Dec 2024 20:02:08 +0100 Subject: [PATCH 23/30] format --- src/engine/ExportQueryExecutionTrees.cpp | 1 - .../sparqlExpressions/StringExpressions.cpp | 28 +++++++++---------- test/SparqlExpressionTest.cpp | 5 ++-- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 8f8b34fb41..201ba3bcf2 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -375,7 +375,6 @@ bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString( std::optional ExportQueryExecutionTrees::handleIriOrLiteral( LiteralOrIri word, bool onlyReturnLiterals, bool onlyReturnLiteralsWithXsdString) { - if (!word.isLiteral()) { if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { return std::nullopt; diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 2e7346a059..1eff4d0974 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -22,26 +22,26 @@ constexpr auto toLiteral = [](std::string_view normalizedContent) { }; // Count UTF-8 characters by skipping continuation bytes (those starting with - // "10"). +// "10"). inline std::size_t utf8Length(std::string_view s) { - return std::ranges::count_if( - s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); + return std::ranges::count_if( + s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); } // Convert UTF-8 position to byte offset inline std::size_t utf8ToByteOffset(std::string_view str, int64_t utf8Pos) { - std::size_t byteOffset = 0; - int64_t charCount = 0; - - for (char c : str) { - if ((static_cast(c) & 0xC0) != 0x80) { - if (charCount++ == utf8Pos) { - break; - } - } - ++byteOffset; + std::size_t byteOffset = 0; + int64_t charCount = 0; + + for (char c : str) { + if ((static_cast(c) & 0xC0) != 0x80) { + if (charCount++ == utf8Pos) { + break; + } } - return byteOffset; + ++byteOffset; + } + return byteOffset; } // String functions. diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index a944129255..099c87e3b4 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -783,9 +783,8 @@ TEST(SparqlExpression, substr) { IdOrLiteralOrIri{lit("bye")}); // WithDataType xsd:string checkSubstr( - IdOrLiteralOrIriVec{lit( - "Hello", - "^^")}, + IdOrLiteralOrIriVec{ + lit("Hello", "^^")}, IdOrLiteralOrIriVec{ lit("Hello World", "^^")}, I(1), I(5)); From 1b7e1b45ec56fe9757347ac9f1ed705242713ad7 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Thu, 12 Dec 2024 12:41:34 +0100 Subject: [PATCH 24/30] Add runtime error --- .../sparqlExpressions/SparqlExpressionValueGetters.cpp | 1 + src/engine/sparqlExpressions/StringExpressions.cpp | 4 ++++ test/SparqlExpressionTest.cpp | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 3dbd3e6a4f..c82ed8b0a3 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -112,6 +112,7 @@ LiteralOrIriValueGetterWithXsdStringFilter::operator()( if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) { return s; } + throw std::runtime_error("Input is not a plain string or xsd:string."); return std::nullopt; } diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 1eff4d0974..6fc8540404 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -241,6 +241,10 @@ class SubstrImpl { std::size_t startByteOffset = utf8ToByteOffset(str, startInt); std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); std::size_t byteLength = endByteOffset - startByteOffset; + + if (!s.value.has_value()) { + throw std::runtime_error("Substr called on an object without a value."); + } s.value().getLiteral().setSubstr(startByteOffset, byteLength); return std::move(s.value()); } diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index 099c87e3b4..1eb4ca85f7 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -601,6 +601,12 @@ TEST(SparqlExpression, stringOperators) { IdOrLiteralOrIriVec{lit("true"), lit("false"), lit("true")}); checkStr(IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}, IdOrLiteralOrIriVec{lit("one"), lit("two"), lit("three")}); + checkStr(IdOrLiteralOrIriVec{iriref(""), + iriref(""), + iriref("")}, + IdOrLiteralOrIriVec{lit("http://example.org/str"), + lit("http://example.org/int"), + lit("http://example.org/bool")}); auto T = Id::makeFromBool(true); auto F = Id::makeFromBool(false); From 344560affbe9e60329a383c9d33f908446098b35 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Thu, 12 Dec 2024 12:59:45 +0100 Subject: [PATCH 25/30] syntax --- src/engine/sparqlExpressions/StringExpressions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 6fc8540404..03a689c481 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -242,7 +242,7 @@ class SubstrImpl { std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); std::size_t byteLength = endByteOffset - startByteOffset; - if (!s.value.has_value()) { + if (!s.value().has_value()) { throw std::runtime_error("Substr called on an object without a value."); } s.value().getLiteral().setSubstr(startByteOffset, byteLength); From 67c747ad32e865c2a5d929fd128635a1ddd9a16f Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Thu, 12 Dec 2024 13:10:14 +0100 Subject: [PATCH 26/30] fix --- src/engine/sparqlExpressions/StringExpressions.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 03a689c481..e061f3ee7b 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -204,6 +204,7 @@ class SubstrImpl { NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { + throw std::runtime_error("Substr called on an object without a value."); return Id::makeUndefined(); } @@ -242,8 +243,8 @@ class SubstrImpl { std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); std::size_t byteLength = endByteOffset - startByteOffset; - if (!s.value().has_value()) { - throw std::runtime_error("Substr called on an object without a value."); + if (!s.value().isLiteral()){ + throw std::runtime_error("Substr called on an object that isn't a literal"); } s.value().getLiteral().setSubstr(startByteOffset, byteLength); return std::move(s.value()); From 313bba49d0af281d800664507c4fee90f8fc7979 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Thu, 12 Dec 2024 13:13:20 +0100 Subject: [PATCH 27/30] T --- src/engine/sparqlExpressions/StringExpressions.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index e061f3ee7b..1aec2a0b6b 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -204,7 +204,7 @@ class SubstrImpl { NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { - throw std::runtime_error("Substr called on an object without a value."); + AD_THROW("Substr called on an object that isn't a literal"); return Id::makeUndefined(); } @@ -243,9 +243,6 @@ class SubstrImpl { std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); std::size_t byteLength = endByteOffset - startByteOffset; - if (!s.value().isLiteral()){ - throw std::runtime_error("Substr called on an object that isn't a literal"); - } s.value().getLiteral().setSubstr(startByteOffset, byteLength); return std::move(s.value()); } From be80b0904533d081d33908944fafdfabef7c3eb0 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Thu, 12 Dec 2024 15:30:18 +0100 Subject: [PATCH 28/30] add exceptions --- src/engine/ExportQueryExecutionTrees.cpp | 3 +++ .../sparqlExpressions/SparqlExpressionValueGetters.cpp | 2 +- src/engine/sparqlExpressions/StringExpressions.cpp | 8 +++++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 201ba3bcf2..6c073577c8 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -377,6 +377,7 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( bool onlyReturnLiteralsWithXsdString) { if (!word.isLiteral()) { if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { + AD_THROW("The input is an IRI, but only literals are allowed."); return std::nullopt; } return word; @@ -386,6 +387,7 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( if (isPlainLiteralOrLiteralWithXsdString(word)) { return word; } + AD_THROW("The literal must either have no datatype or datatype xsd:string."); return std::nullopt; } @@ -484,6 +486,7 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( getLiteralOrIriFromVocabIndex(index, id, localVocab), onlyReturnLiterals, onlyReturnLiteralsWithXsdString); case TextRecordIndex: + AD_THROW("TextRecordIndex case is not implemented."); return std::nullopt; default: return idToLiteralOrIriForEncodedValue(id, diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index c82ed8b0a3..89b487d3ad 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -112,7 +112,7 @@ LiteralOrIriValueGetterWithXsdStringFilter::operator()( if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) { return s; } - throw std::runtime_error("Input is not a plain string or xsd:string."); + AD_THROW("Input is not a plain string or xsd:string."); return std::nullopt; } diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 1aec2a0b6b..af042a9624 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -204,7 +204,13 @@ class SubstrImpl { NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { - AD_THROW("Substr called on an object that isn't a literal"); + if(!s.has_value()){ + AD_THROW("Substr called on an object without a value."); + } + else if(std::holds_alternative(start) || + std::holds_alternative(length)){ + AD_THROW("Invalid arguments: 'start' and 'length' must be numeric values."); + } return Id::makeUndefined(); } From 2adaa302f673c46bb2ff79713dfb22f3590c0c23 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Sat, 14 Dec 2024 10:02:33 +0100 Subject: [PATCH 29/30] nix --- src/engine/ExportQueryExecutionTrees.cpp | 3 +- .../sparqlExpressions/StringExpressions.cpp | 7 ---- test/ExportQueryExecutionTreesTest.cpp | 35 +++++++++++++++++-- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 6c073577c8..db7b5ea4c0 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -387,7 +387,6 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( if (isPlainLiteralOrLiteralWithXsdString(word)) { return word; } - AD_THROW("The literal must either have no datatype or datatype xsd:string."); return std::nullopt; } @@ -486,7 +485,7 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( getLiteralOrIriFromVocabIndex(index, id, localVocab), onlyReturnLiterals, onlyReturnLiteralsWithXsdString); case TextRecordIndex: - AD_THROW("TextRecordIndex case is not implemented."); + AD_THROW("TextRecordIndex case is not implemented."); return std::nullopt; default: return idToLiteralOrIriForEncodedValue(id, diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index af042a9624..11d182e9d5 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -204,13 +204,6 @@ class SubstrImpl { NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { - if(!s.has_value()){ - AD_THROW("Substr called on an object without a value."); - } - else if(std::holds_alternative(start) || - std::holds_alternative(length)){ - AD_THROW("Invalid arguments: 'start' and 'length' must be numeric values."); - } return Id::makeUndefined(); } diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index 2813c07cab..9c9cfef83c 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1710,9 +1710,11 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { // Case: IRI {getId(""), - {{false, false, ""}, - {true, false, std::nullopt}, - {false, true, std::nullopt}}}, + { + {false, false, ""} + //, {true, false, std::nullopt} + //,{false, true, std::nullopt} + }}, // Case: datatype `Int` {ad_utility::testing::IntId(1), @@ -1727,3 +1729,30 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { checkIdToLiteralOrIri(id, cases); } } + +TEST(ExportQueryExecutionTrees, IsPlainLiteralOrLiteralWithXsdString) { + using Iri = ad_utility::triple_component::Iri; + using LiteralOrIri = ad_utility::triple_component::LiteralOrIri; + using Literal = ad_utility::triple_component::Literal; + + auto toLiteralOrIri = [](std::string_view content, auto descriptor) { + return LiteralOrIri{Literal::literalWithNormalizedContent( + asNormalizedStringViewUnsafe(content), descriptor)}; + }; + + auto verify = [](const LiteralOrIri& input, bool expected) { + EXPECT_EQ( + ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(input), + expected); + }; + + verify(toLiteralOrIri("Hallo", std::nullopt), true); + verify(toLiteralOrIri( + "Hallo", + Iri::fromIriref("")), + true); + verify( + toLiteralOrIri( + "Hallo", Iri::fromIriref("")), + false); +} From 7455f29bde0d2c29277afc9478fbb8d1ce0bc221 Mon Sep 17 00:00:00 2001 From: Annika Greif Date: Mon, 6 Jan 2025 12:14:40 +0100 Subject: [PATCH 30/30] idToLiteral without Iri --- src/engine/ExportQueryExecutionTrees.cpp | 46 ++++---- src/engine/ExportQueryExecutionTrees.h | 12 +- .../SparqlExpressionValueGetters.cpp | 22 ++-- .../SparqlExpressionValueGetters.h | 28 ++--- .../sparqlExpressions/StringExpressions.cpp | 104 +++++++++--------- test/ExportQueryExecutionTreesTest.cpp | 4 +- 6 files changed, 106 insertions(+), 110 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 3b261e7aa0..58b33e03dd 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -350,8 +350,8 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { } // _____________________________________________________________________________ -std::optional -ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( +std::optional +ExportQueryExecutionTrees::idToLiteralForEncodedValue( Id id, bool onlyReturnLiteralsWithXsdString) { if (onlyReturnLiteralsWithXsdString) { return std::nullopt; @@ -361,7 +361,8 @@ ExportQueryExecutionTrees::idToLiteralOrIriForEncodedValue( return std::nullopt; } - return LiteralOrIri::literalWithoutQuotes(optionalStringAndType->first); + return ad_utility::triple_component::Literal::literalWithoutQuotes( + optionalStringAndType->first); } // _____________________________________________________________________________ @@ -372,20 +373,17 @@ bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString( } // _____________________________________________________________________________ -std::optional ExportQueryExecutionTrees::handleIriOrLiteral( - LiteralOrIri word, bool onlyReturnLiterals, - bool onlyReturnLiteralsWithXsdString) { +std::optional +ExportQueryExecutionTrees::handleIriOrLiteral( + LiteralOrIri word, bool onlyReturnLiteralsWithXsdString) { if (!word.isLiteral()) { - if (onlyReturnLiterals || onlyReturnLiteralsWithXsdString) { - AD_THROW("The input is an IRI, but only literals are allowed."); - return std::nullopt; - } - return word; + AD_THROW("The input is an IRI, but only literals are allowed."); + return std::nullopt; } if (onlyReturnLiteralsWithXsdString) { if (isPlainLiteralOrLiteralWithXsdString(word)) { - return word; + return word.getLiteral(); } return std::nullopt; } @@ -393,7 +391,7 @@ std::optional ExportQueryExecutionTrees::handleIriOrLiteral( if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) { word.getLiteral().removeDatatype(); } - return word; + return word.getLiteral(); } // _____________________________________________________________________________ @@ -463,9 +461,10 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, // _____________________________________________________________________________ template -std::optional ExportQueryExecutionTrees::idToLiteralOrIri( - const Index& index, Id id, const LocalVocab& localVocab, - bool onlyReturnLiteralsWithXsdString) { +std::optional +ExportQueryExecutionTrees::idToLiteral(const Index& index, Id id, + const LocalVocab& localVocab, + bool onlyReturnLiteralsWithXsdString) { using enum Datatype; auto datatype = id.getDatatype(); @@ -477,19 +476,18 @@ std::optional ExportQueryExecutionTrees::idToLiteralOrIri( switch (datatype) { case WordVocabIndex: - return LiteralOrIri::literalWithoutQuotes( + return ad_utility::triple_component::Literal::literalWithoutQuotes( index.indexToString(id.getWordVocabIndex())); case VocabIndex: case LocalVocabIndex: return handleIriOrLiteral( getLiteralOrIriFromVocabIndex(index, id, localVocab), - onlyReturnLiterals, onlyReturnLiteralsWithXsdString); + onlyReturnLiteralsWithXsdString); case TextRecordIndex: AD_THROW("TextRecordIndex case is not implemented."); return std::nullopt; default: - return idToLiteralOrIriForEncodedValue(id, - onlyReturnLiteralsWithXsdString); + return idToLiteralForEncodedValue(id, onlyReturnLiteralsWithXsdString); } } @@ -515,14 +513,14 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id, std::identity&& escapeFunction); // ___________________________________________________________________________ -template std::optional -ExportQueryExecutionTrees::idToLiteralOrIri( +template std::optional +ExportQueryExecutionTrees::idToLiteral( const Index& index, Id id, const LocalVocab& localVocab, bool onlyReturnLiteralsWithXsdString); // ___________________________________________________________________________ -template std::optional -ExportQueryExecutionTrees::idToLiteralOrIri( +template std::optional +ExportQueryExecutionTrees::idToLiteral( const Index& index, Id id, const LocalVocab& localVocab, bool onlyReturnLiteralsWithXsdString); diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 2e012c96c5..edc0b48e4b 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -78,7 +78,7 @@ class ExportQueryExecutionTrees { // return 'std::nullopt'. These semantics are useful for the string // expressions in StringExpressions.cpp. template - static std::optional idToLiteralOrIri( + static std::optional idToLiteral( const Index& index, Id id, const LocalVocab& localVocab, bool onlyReturnLiteralsWithXsdString = false); @@ -88,14 +88,14 @@ class ExportQueryExecutionTrees { // If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`. // If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from // literals (e.g. the integer `42` is converted to the plain literal `"42"`). - static std::optional idToLiteralOrIriForEncodedValue( - Id id, bool onlyReturnLiteralsWithXsdString = false); + static std::optional + idToLiteralForEncodedValue(Id id, + bool onlyReturnLiteralsWithXsdString = false); // A helper function for the `idToLiteralOrIri` function. Checks and processes // a LiteralOrIri based on the given parameters. - static std::optional handleIriOrLiteral( - LiteralOrIri word, bool onlyReturnLiterals, - bool onlyReturnLiteralsWithXsdString); + static std::optional + handleIriOrLiteral(LiteralOrIri word, bool onlyReturnLiteralsWithXsdString); // Checks if a LiteralOrIri is either a plain literal (without datatype) // or a literal with the `xsd:string` datatype. diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 89b487d3ad..a82d18cee1 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -91,26 +91,26 @@ std::optional StringValueGetter::operator()( } // ____________________________________________________________________________ -std::optional LiteralOrIriValueGetter::operator()( - Id id, const EvaluationContext* context) const { - return ExportQueryExecutionTrees::idToLiteralOrIri(context->_qec.getIndex(), - id, context->_localVocab); +std::optional +LiteralValueGetter::operator()(Id id, const EvaluationContext* context) const { + return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id, + context->_localVocab); } // ____________________________________________________________________________ -std::optional -LiteralOrIriValueGetterWithXsdStringFilter::operator()( +std::optional +LiteralValueGetterWithXsdStringFilter::operator()( Id id, const EvaluationContext* context) const { - return ExportQueryExecutionTrees::idToLiteralOrIri( - context->_qec.getIndex(), id, context->_localVocab, true); + return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id, + context->_localVocab, true); } // ____________________________________________________________________________ -std::optional -LiteralOrIriValueGetterWithXsdStringFilter::operator()( +std::optional +LiteralValueGetterWithXsdStringFilter::operator()( const LiteralOrIri& s, const EvaluationContext*) const { if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) { - return s; + return s.getLiteral(); } AD_THROW("Input is not a plain string or xsd:string."); return std::nullopt; diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 159040eedd..59947bd311 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -143,29 +143,29 @@ struct StringValueGetter : Mixin { // This class can be used as the `ValueGetter` argument of Expression // templates. It produces a LiteralOrIri. -struct LiteralOrIriValueGetter : Mixin { - using Mixin::operator(); +struct LiteralValueGetter : Mixin { + using Mixin::operator(); - std::optional operator()(ValueId, - const EvaluationContext*) const; + std::optional operator()( + ValueId, const EvaluationContext*) const; - std::optional operator()(const LiteralOrIri& s, - const EvaluationContext*) const { - return s; + std::optional operator()( + const LiteralOrIri& s, const EvaluationContext*) const { + return s.getLiteral(); } }; // Same as above but only literals with 'xsd:string' datatype or no datatype are // returned. -struct LiteralOrIriValueGetterWithXsdStringFilter - : Mixin { - using Mixin::operator(); +struct LiteralValueGetterWithXsdStringFilter + : Mixin { + using Mixin::operator(); - std::optional operator()(ValueId, - const EvaluationContext*) const; + std::optional operator()( + ValueId, const EvaluationContext*) const; - std::optional operator()(const LiteralOrIri& s, - const EvaluationContext*) const; + std::optional operator()( + const LiteralOrIri& s, const EvaluationContext*) const; }; // Value getter for `isBlank`. diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index 9cfa0117b7..2c1295660b 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -109,6 +109,49 @@ class StringExpressionImpl : public SparqlExpression { } }; +// Same as the `StringExpressionImpl` above, but with the LiteralOrValueGetter. +template +class LiteralExpressionImpl : public SparqlExpression { + private: + using ExpressionWithStr = NARY< + N, FV>; + using ExpressionWithoutStr = + NARY>; + + SparqlExpression::Ptr impl_; + + public: + explicit LiteralExpressionImpl( + SparqlExpression::Ptr child, + std::same_as auto... children) + requires(sizeof...(children) + 1 == N) { + AD_CORRECTNESS_CHECK(child != nullptr); + if (child->isStrExpression()) { + auto childrenOfStr = std::move(*child).moveChildrenOut(); + AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1); + impl_ = std::make_unique( + std::move(childrenOfStr.at(0)), std::move(children)...); + } else { + impl_ = std::make_unique(std::move(child), + std::move(children)...); + } + } + + ExpressionResult evaluate(EvaluationContext* context) const override { + return impl_->evaluate(context); + } + std::string getCacheKey(const VariableToColumnMap& varColMap) const override { + return impl_->getCacheKey(varColMap); + } + + private: + std::span childrenImpl() override { + return impl_->children(); + } +}; + // Lift a `Function` that takes one or multiple `std::string`s (possibly via // references) and returns an `Id` or `std::string` to a function that takes the // same number of `std::optional` and returns `Id` or @@ -201,8 +244,9 @@ class SubstrImpl { }; public: - IdOrLiteralOrIri operator()(std::optional s, NumericValue start, - NumericValue length) const { + IdOrLiteralOrIri operator()( + std::optional s, + NumericValue start, NumericValue length) const { if (!s.has_value() || std::holds_alternative(start) || std::holds_alternative(length)) { return Id::makeUndefined(); @@ -243,60 +287,14 @@ class SubstrImpl { std::size_t endByteOffset = utf8ToByteOffset(str, startInt + lengthInt); std::size_t byteLength = endByteOffset - startByteOffset; - s.value().getLiteral().setSubstr(startByteOffset, byteLength); - return std::move(s.value()); - } -}; - -// Implementation of the `SUBSTR` SPARQL function. It dynamically -// selects the appropriate value getter for the first argument based on whether -// it is a `STR()` expression (using -// `LiteralOrIriValueGetterWithXsdStringFilter`) or another type (using -// `LiteralOrIriValueGetter`). -class SubstrExpressionImpl : public SparqlExpression { - private: - using ExpressionWithStr = - NARY<3, FV>; - using ExpressionWithoutStr = - NARY<3, FV>; - - SparqlExpression::Ptr impl_; - - public: - explicit SubstrExpressionImpl( - SparqlExpression::Ptr child, - std::same_as auto... children) - requires(sizeof...(children) + 1 == 3) { - AD_CORRECTNESS_CHECK(child != nullptr); - - if (child->isStrExpression()) { - auto childrenOfStr = std::move(*child).moveChildrenOut(); - AD_CORRECTNESS_CHECK(childrenOfStr.size() == 1); - impl_ = std::make_unique( - std::move(childrenOfStr.at(0)), std::move(children)...); - } else { - impl_ = std::make_unique(std::move(child), - std::move(children)...); - } - } - - ExpressionResult evaluate(EvaluationContext* context) const override { - return impl_->evaluate(context); - } - - std::string getCacheKey(const VariableToColumnMap& varColMap) const override { - return impl_->getCacheKey(varColMap); - } - - private: - std::span childrenImpl() override { - return impl_->children(); + s.value().setSubstr(startByteOffset, byteLength); + return std::move(LiteralOrIri(s.value())); } }; -using SubstrExpression = SubstrExpressionImpl; +using SubstrExpression = + LiteralExpressionImpl<3, SubstrImpl, NumericValueGetter, + NumericValueGetter>; // STRSTARTS [[maybe_unused]] auto strStartsImpl = [](std::string_view text, diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index aa098fee00..dc72040dc0 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -1649,10 +1649,10 @@ TEST(ExportQueryExecutionTrees, idToLiteralOrIriFunctionality) { auto callIdToLiteralOrIri = [&](Id id, bool onlyLiterals, bool onlyLiteralsWithXsdString = false) { if (onlyLiterals) { - return ExportQueryExecutionTrees::idToLiteralOrIri( + return ExportQueryExecutionTrees::idToLiteral( qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); } else { - return ExportQueryExecutionTrees::idToLiteralOrIri( + return ExportQueryExecutionTrees::idToLiteral( qec->getIndex(), id, LocalVocab{}, onlyLiteralsWithXsdString); } };