Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct datatypes for string expressions #1636

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
47feb71
STRLEN für UTF 8 angepasst
DuDaAG Oct 24, 2024
4877e8f
Test
DuDaAG Oct 24, 2024
aefd889
test rückgäning
DuDaAG Oct 24, 2024
b2cb8c6
find pull-request
DuDaAG Oct 25, 2024
96b1959
Fix test
DuDaAG Oct 25, 2024
b7806b8
Update src/engine/sparqlExpressions/StringExpressions.cpp
DuDaAG Oct 31, 2024
a83d74b
Update src/engine/sparqlExpressions/StringExpressions.cpp
DuDaAG Oct 31, 2024
e73d1ab
Format
Nov 1, 2024
0ed79df
new LiteralOrIriValueGetter
Nov 13, 2024
2666b78
Merge branch 'my-branch' into master
DuDaAG Nov 13, 2024
27fff04
Merge pull request #3 from DuDaAG/master
DuDaAG Nov 13, 2024
7078f60
idToLiteralAndIri with specifiactions
Nov 22, 2024
2581f4c
some fixes
Nov 22, 2024
6d7a2b2
Add Test IdToLiteralOrIri and some formatting
Nov 24, 2024
5948dcb
formatting
Nov 24, 2024
52ef1f5
Correction for sonar
Nov 24, 2024
f15bf94
SubStr improvements
Nov 29, 2024
774d52b
fix
Nov 29, 2024
d4b49c0
little changes
Nov 30, 2024
72aaa00
Feedback implemented
Dec 7, 2024
25000a9
format
Dec 7, 2024
617c3b7
New position codespell-ignore
Dec 7, 2024
f631ec2
delete codespell-ignore
Dec 7, 2024
889e9dd
UTF8 handling in subStr
Dec 7, 2024
0c41603
format
Dec 7, 2024
1b7e1b4
Add runtime error
Dec 12, 2024
344560a
syntax
Dec 12, 2024
67c747a
fix
Dec 12, 2024
313bba4
T
Dec 12, 2024
be80b09
add exceptions
Dec 12, 2024
2adaa30
nix
Dec 14, 2024
39ca3cb
Merge branch 'master' into Correct-Datatypes-for-StringExpressions
joka921 Dec 18, 2024
7455f29
idToLiteral without Iri
Jan 6, 2025
ac95531
Merge branch 'ad-freiburg:master' into Correct-Datatypes-for-StringEx…
DuDaAG Jan 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 92 additions & 1 deletion src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include "util/ConstexprUtils.h"
#include "util/http/MediaTypes.h"

using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;

// Return true iff the `result` is nonempty.
bool getResultForAsk(const std::shared_ptr<const Result>& result) {
if (result->isFullyMaterialized()) {
Expand Down Expand Up @@ -347,11 +349,55 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) {
}
}

// _____________________________________________________________________________
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteralForEncodedValue(
Id id, bool onlyReturnLiteralsWithXsdString) {
if (onlyReturnLiteralsWithXsdString) {
return std::nullopt;
}
auto optionalStringAndType = idToStringAndTypeForEncodedValue(id);
if (!optionalStringAndType) {
return std::nullopt;
}

return ad_utility::triple_component::Literal::literalWithoutQuotes(
optionalStringAndType->first);
}

// _____________________________________________________________________________
bool ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(
const LiteralOrIri& word) {
return !word.hasDatatype() ||
asStringViewUnsafe(word.getDatatype()) == XSD_STRING;
}

// _____________________________________________________________________________
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::handleIriOrLiteral(
LiteralOrIri word, bool onlyReturnLiteralsWithXsdString) {
if (!word.isLiteral()) {
AD_THROW("The input is an IRI, but only literals are allowed.");
return std::nullopt;
}

if (onlyReturnLiteralsWithXsdString) {
if (isPlainLiteralOrLiteralWithXsdString(word)) {
return word.getLiteral();
}
return std::nullopt;
}

if (word.hasDatatype() && !isPlainLiteralOrLiteralWithXsdString(word)) {
word.getLiteral().removeDatatype();
}
return word.getLiteral();
}

// _____________________________________________________________________________
ad_utility::triple_component::LiteralOrIri
ExportQueryExecutionTrees::getLiteralOrIriFromVocabIndex(
const Index& index, Id id, const LocalVocab& localVocab) {
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;
switch (id.getDatatype()) {
case Datatype::LocalVocabIndex:
return localVocab.getWord(id.getLocalVocabIndex()).asLiteralOrIri();
Expand Down Expand Up @@ -412,6 +458,39 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
return idToStringAndTypeForEncodedValue(id);
}
}

// _____________________________________________________________________________
template <bool onlyReturnLiterals>
std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral(const Index& index, Id id,
const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString) {
using enum Datatype;
auto datatype = id.getDatatype();

if constexpr (onlyReturnLiterals) {
if (!(datatype == VocabIndex || datatype == LocalVocabIndex)) {
return std::nullopt;
}
}

switch (datatype) {
case WordVocabIndex:
return ad_utility::triple_component::Literal::literalWithoutQuotes(
index.indexToString(id.getWordVocabIndex()));
case VocabIndex:
case LocalVocabIndex:
return handleIriOrLiteral(
getLiteralOrIriFromVocabIndex(index, id, localVocab),
onlyReturnLiteralsWithXsdString);
case TextRecordIndex:
AD_THROW("TextRecordIndex case is not implemented.");
return std::nullopt;
default:
return idToLiteralForEncodedValue(id, onlyReturnLiteralsWithXsdString);
}
}

// ___________________________________________________________________________
template std::optional<std::pair<std::string, const char*>>
ExportQueryExecutionTrees::idToStringAndType<true, false, std::identity>(
Expand All @@ -433,6 +512,18 @@ ExportQueryExecutionTrees::idToStringAndType(const Index& index, Id id,
const LocalVocab& localVocab,
std::identity&& escapeFunction);

// ___________________________________________________________________________
template std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral<false>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

// ___________________________________________________________________________
template std::optional<ad_utility::triple_component::Literal>
ExportQueryExecutionTrees::idToLiteral<true>(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString);

// Convert a stringvalue and optional type to JSON binding.
static nlohmann::json stringAndTypeToBinding(std::string_view entitystr,
const char* xsdType) {
Expand Down
32 changes: 32 additions & 0 deletions src/engine/ExportQueryExecutionTrees.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class ExportQueryExecutionTrees {
public:
using MediaType = ad_utility::MediaType;
using CancellationHandle = ad_utility::SharedCancellationHandle;
using LiteralOrIri = ad_utility::triple_component::LiteralOrIri;

// Compute the result of the given `parsedQuery` (created by the
// `SparqlParser`) for which the `QueryExecutionTree` has been previously
Expand Down Expand Up @@ -69,6 +70,37 @@ class ExportQueryExecutionTrees {
static std::optional<std::pair<std::string, const char*>>
idToStringAndTypeForEncodedValue(Id id);

// Convert the `id` to a 'LiteralOrIri. Datatypes are always stripped unless
// they are 'xsd:string', so for literals with non-'xsd:string' datatypes
// (this includes IDs that directly store their value, like Doubles) the
// datatype is always empty. If 'onlyReturnLiteralsWithXsdString' is true, all
// IRIs and literals with non'-xsd:string' datatypes (including encoded IDs)
// return 'std::nullopt'. These semantics are useful for the string
// expressions in StringExpressions.cpp.
template <bool returnOnlyLiterals = false>
static std::optional<ad_utility::triple_component::Literal> idToLiteral(
const Index& index, Id id, const LocalVocab& localVocab,
bool onlyReturnLiteralsWithXsdString = false);

// Same as the previous function, but only handles the datatypes for which the
// value is encoded directly in the ID. For other datatypes an exception is
// thrown.
// If `onlyReturnLiteralsWithXsdString` is `true`, returns `std::nullopt`.
// If `onlyReturnLiteralsWithXsdString` is `false`, removes datatypes from
// literals (e.g. the integer `42` is converted to the plain literal `"42"`).
static std::optional<ad_utility::triple_component::Literal>
idToLiteralForEncodedValue(Id id,
bool onlyReturnLiteralsWithXsdString = false);

// A helper function for the `idToLiteralOrIri` function. Checks and processes
// a LiteralOrIri based on the given parameters.
static std::optional<ad_utility::triple_component::Literal>
handleIriOrLiteral(LiteralOrIri word, bool onlyReturnLiteralsWithXsdString);

// Checks if a LiteralOrIri is either a plain literal (without datatype)
// or a literal with the `xsd:string` datatype.
static bool isPlainLiteralOrLiteralWithXsdString(const LiteralOrIri& word);

// Acts as a helper to retrieve an LiteralOrIri object
// from an Id, where the Id is of type `VocabIndex` or `LocalVocabIndex`.
// This function should only be called with suitable `Datatype` Id's,
Expand Down
26 changes: 26 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,32 @@ std::optional<std::string> StringValueGetter::operator()(
}
}

// ____________________________________________________________________________
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetter::operator()(Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id,
context->_localVocab);
}

// ____________________________________________________________________________
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetterWithXsdStringFilter::operator()(
Id id, const EvaluationContext* context) const {
return ExportQueryExecutionTrees::idToLiteral(context->_qec.getIndex(), id,
context->_localVocab, true);
}

// ____________________________________________________________________________
std::optional<ad_utility::triple_component::Literal>
LiteralValueGetterWithXsdStringFilter::operator()(
const LiteralOrIri& s, const EvaluationContext*) const {
if (ExportQueryExecutionTrees::isPlainLiteralOrLiteralWithXsdString(s)) {
return s.getLiteral();
}
AD_THROW("Input is not a plain string or xsd:string.");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a debug output.

return std::nullopt;
}

// ____________________________________________________________________________
template <auto isSomethingFunction, auto prefix>
Id IsSomethingValueGetter<isSomethingFunction, prefix>::operator()(
Expand Down
39 changes: 39 additions & 0 deletions src/engine/sparqlExpressions/SparqlExpressionValueGetters.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,45 @@ struct StringValueGetter : Mixin<StringValueGetter> {
}
};

// This class can be used as the `ValueGetter` argument of Expression
// templates. It produces a LiteralOrIri.
struct LiteralValueGetter : Mixin<LiteralValueGetter> {
using Mixin<LiteralValueGetter>::operator();

std::optional<ad_utility::triple_component::Literal> operator()(
ValueId, const EvaluationContext*) const;

std::optional<ad_utility::triple_component::Literal> operator()(
const LiteralOrIri& s, const EvaluationContext*) const {
return s.getLiteral();
}
};

// Same as above but only literals with 'xsd:string' datatype or no datatype are
// returned.
struct LiteralValueGetterWithXsdStringFilter
: Mixin<LiteralValueGetterWithXsdStringFilter> {
using Mixin<LiteralValueGetterWithXsdStringFilter>::operator();

std::optional<ad_utility::triple_component::Literal> operator()(
ValueId, const EvaluationContext*) const;

std::optional<ad_utility::triple_component::Literal> operator()(
const LiteralOrIri& s, const EvaluationContext*) const;
};

// Value getter for `isBlank`.
struct IsBlankNodeValueGetter : Mixin<IsBlankNodeValueGetter> {
using Mixin<IsBlankNodeValueGetter>::operator();
Id operator()(ValueId id, const EvaluationContext*) const {
return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex);
}
DuDaAG marked this conversation as resolved.
Show resolved Hide resolved

Id operator()(const LiteralOrIri&, const EvaluationContext*) const {
return Id::makeFromBool(false);
}
};

// Boolean value getter that checks whether the given `Id` is a `ValueId` of the
// given `datatype`.
template <Datatype datatype>
Expand Down
Loading
Loading