Skip to content

Commit

Permalink
Merge branch 'master' into addAllDatasetFromQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
Qup42 committed Jan 22, 2025
2 parents fd8ddbe + d7ec9be commit a97523f
Show file tree
Hide file tree
Showing 29 changed files with 1,227 additions and 397 deletions.
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ add_library(engine
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
Describe.cpp)
Describe.cpp GraphStoreProtocol.cpp)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
101 changes: 101 additions & 0 deletions src/engine/GraphStoreProtocol.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Julian Mundhahs <[email protected]>

#include "engine/GraphStoreProtocol.h"

#include "util/http/beast.h"

// ____________________________________________________________________________
GraphOrDefault GraphStoreProtocol::extractTargetGraph(
const ad_utility::url_parser::ParamValueMap& params) {
const std::optional<std::string> graphIri =
ad_utility::url_parser::checkParameter(params, "graph", std::nullopt);
const bool isDefault =
ad_utility::url_parser::checkParameter(params, "default", "").has_value();
if (graphIri.has_value() == isDefault) {
throw std::runtime_error(
"Exactly one of the query parameters default or graph must be set to "
"identify the graph for the graph store protocol request.");
}
if (graphIri.has_value()) {
return GraphRef::fromIrirefWithoutBrackets(graphIri.value());
} else {
AD_CORRECTNESS_CHECK(isDefault);
return DEFAULT{};
}
}

// ____________________________________________________________________________
void GraphStoreProtocol::throwUnsupportedMediatype(
const string_view& mediatype) {
throw UnsupportedMediatypeError(absl::StrCat(
"Mediatype \"", mediatype,
"\" is not supported for SPARQL Graph Store HTTP Protocol in QLever. "
"Supported: ",
toString(ad_utility::MediaType::turtle), ", ",
toString(ad_utility::MediaType::ntriples), "."));
}

// ____________________________________________________________________________
void GraphStoreProtocol::throwUnsupportedHTTPMethod(
const std::string_view& method) {
throw std::runtime_error(absl::StrCat(
method,
" in the SPARQL Graph Store HTTP Protocol is not yet implemented "
"in QLever."));
}

// ____________________________________________________________________________
std::vector<TurtleTriple> GraphStoreProtocol::parseTriples(
const string& body, const ad_utility::MediaType contentType) {
using Re2Parser = RdfStringParser<TurtleParser<Tokenizer>>;
switch (contentType) {
case ad_utility::MediaType::turtle:
case ad_utility::MediaType::ntriples: {
auto parser = Re2Parser();
parser.setInputStream(body);
return parser.parseAndReturnAllTriples();
}
default: {
throwUnsupportedMediatype(toString(contentType));
}
}
}

// ____________________________________________________________________________
std::vector<SparqlTripleSimpleWithGraph> GraphStoreProtocol::convertTriples(
const GraphOrDefault& graph, std::vector<TurtleTriple> triples) {
SparqlTripleSimpleWithGraph::Graph tripleGraph{std::monostate{}};
if (std::holds_alternative<GraphRef>(graph)) {
tripleGraph = Iri(std::get<GraphRef>(graph).toStringRepresentation());
}
auto transformTurtleTriple = [&tripleGraph](TurtleTriple&& triple) {
AD_CORRECTNESS_CHECK(triple.graphIri_.isId() &&
triple.graphIri_.getId() ==
qlever::specialIds().at(DEFAULT_GRAPH_IRI));

return SparqlTripleSimpleWithGraph(std::move(triple.subject_),
std::move(triple.predicate_),
std::move(triple.object_), tripleGraph);
};
return ad_utility::transform(std::move(triples), transformTurtleTriple);
}

// ____________________________________________________________________________
ParsedQuery GraphStoreProtocol::transformGet(const GraphOrDefault& graph) {
ParsedQuery res;
res._clause = parsedQuery::ConstructClause(
{{Variable("?s"), Variable("?p"), Variable("?o")}});
res._rootGraphPattern = {};
parsedQuery::GraphPattern selectSPO;
selectSPO._graphPatterns.emplace_back(parsedQuery::BasicGraphPattern{
{SparqlTriple(Variable("?s"), "?p", Variable("?o"))}});
if (const auto* iri =
std::get_if<ad_utility::triple_component::Iri>(&graph)) {
res.datasetClauses_ =
parsedQuery::DatasetClauses::fromClauses({DatasetClause{*iri, false}});
}
res._rootGraphPattern = std::move(selectSPO);
return res;
}
140 changes: 140 additions & 0 deletions src/engine/GraphStoreProtocol.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Julian Mundhahs <[email protected]>

#pragma once

#include <gtest/gtest_prod.h>

#include "parser/ParsedQuery.h"
#include "parser/RdfParser.h"
#include "util/http/HttpUtils.h"
#include "util/http/UrlParser.h"

// The mediatype of a request could not be determined.
class UnknownMediatypeError : public std::runtime_error {
public:
explicit UnknownMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// The mediatype of a request is not supported.
class UnsupportedMediatypeError : public std::runtime_error {
public:
explicit UnsupportedMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// Transform SPARQL Graph Store Protocol requests to their equivalent
// ParsedQuery (SPARQL Query or Update).
class GraphStoreProtocol {
private:
// Extract the mediatype from a request.
static ad_utility::MediaType extractMediatype(
const ad_utility::httpUtils::HttpRequest auto& rawRequest) {
using namespace boost::beast::http;

std::string_view contentTypeString;
if (rawRequest.find(field::content_type) != rawRequest.end()) {
contentTypeString = rawRequest.at(field::content_type);
}
if (contentTypeString.empty()) {
// If the mediatype is not given, return an error.
// Note: The specs also allow to try to determine the media type from the
// content.
throw UnknownMediatypeError("Mediatype empty or not set.");
}
const auto mediatype =
ad_utility::getMediaTypeFromAcceptHeader(contentTypeString);
// A media type is set but not one of the supported ones as per the QLever
// MediaType code.
if (!mediatype.has_value()) {
throwUnsupportedMediatype(rawRequest.at(field::content_type));
}
return mediatype.value();
}
FRIEND_TEST(GraphStoreProtocolTest, extractMediatype);

// Throws the error if a mediatype is not supported.
[[noreturn]] static void throwUnsupportedMediatype(
const std::string_view& mediatype);

// Throws the error if an HTTP method is not supported.
[[noreturn]] static void throwUnsupportedHTTPMethod(
const std::string_view& method);

// Parse the triples from the request body according to the content type.
static std::vector<TurtleTriple> parseTriples(
const std::string& body, const ad_utility::MediaType contentType);
FRIEND_TEST(GraphStoreProtocolTest, parseTriples);

// Transforms the triples from `TurtleTriple` to `SparqlTripleSimpleWithGraph`
// and sets the correct graph.
static std::vector<SparqlTripleSimpleWithGraph> convertTriples(
const GraphOrDefault& graph, std::vector<TurtleTriple> triples);
FRIEND_TEST(GraphStoreProtocolTest, convertTriples);

// Transform a SPARQL Graph Store Protocol POST to an equivalent ParsedQuery
// which is an SPARQL Update.
static ParsedQuery transformPost(
const ad_utility::httpUtils::HttpRequest auto& rawRequest,
const GraphOrDefault& graph) {
auto triples =
parseTriples(rawRequest.body(), extractMediatype(rawRequest));
auto convertedTriples = convertTriples(graph, std::move(triples));
updateClause::GraphUpdate up{std::move(convertedTriples), {}};
ParsedQuery res;
res._clause = parsedQuery::UpdateClause{std::move(up)};
return res;
}
FRIEND_TEST(GraphStoreProtocolTest, transformPost);

// Transform a SPARQL Graph Store Protocol GET to an equivalent ParsedQuery
// which is an SPARQL Query.
static ParsedQuery transformGet(const GraphOrDefault& graph);
FRIEND_TEST(GraphStoreProtocolTest, transformGet);

public:
// Every Graph Store Protocol request has equivalent SPARQL Query or Update.
// Transform the Graph Store Protocol request into it's equivalent Query or
// Update.
static ParsedQuery transformGraphStoreProtocol(
const ad_utility::httpUtils::HttpRequest auto& rawRequest) {
ad_utility::url_parser::ParsedUrl parsedUrl =
ad_utility::url_parser::parseRequestTarget(rawRequest.target());
// We only support passing the target graph as a query parameter (`Indirect
// Graph Identification`). `Direct Graph Identification` (the URL is the
// graph) is not supported. See also
// https://www.w3.org/TR/2013/REC-sparql11-http-rdf-update-20130321/#graph-identification.
GraphOrDefault graph = extractTargetGraph(parsedUrl.parameters_);

using enum boost::beast::http::verb;
auto method = rawRequest.method();
if (method == get) {
return transformGet(graph);
} else if (method == put) {
throwUnsupportedHTTPMethod("PUT");
} else if (method == delete_) {
throwUnsupportedHTTPMethod("DELETE");
} else if (method == post) {
return transformPost(rawRequest, graph);
} else if (method == head) {
throwUnsupportedHTTPMethod("HEAD");
} else if (method == patch) {
throwUnsupportedHTTPMethod("PATCH");
} else {
throw std::runtime_error(
absl::StrCat("Unsupported HTTP method \"",
std::string_view{rawRequest.method_string()},
"\" for the SPARQL Graph Store HTTP Protocol."));
}
}

private:
// Extract the graph to be acted upon using from the URL query parameters
// (`Indirect Graph Identification`). See
// https://www.w3.org/TR/2013/REC-sparql11-http-rdf-update-20130321/#indirect-graph-identification
static GraphOrDefault extractTargetGraph(
const ad_utility::url_parser::ParamValueMap& params);
FRIEND_TEST(GraphStoreProtocolTest, extractTargetGraph);
};
42 changes: 9 additions & 33 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string>
#include <vector>

#include "GraphStoreProtocol.h"
#include "engine/ExecuteUpdate.h"
#include "engine/ExportQueryExecutionTrees.h"
#include "engine/QueryPlanner.h"
Expand Down Expand Up @@ -360,8 +361,8 @@ Awaitable<void> Server::process(

// We always want to call `Server::checkParameter` with the same first
// parameter.
auto checkParameter =
std::bind_front(&Server::checkParameter, std::cref(parameters));
auto checkParameter = std::bind_front(&ad_utility::url_parser::checkParameter,
std::cref(parameters));

// Check the access token. If an access token is provided and the check fails,
// throw an exception and do not process any part of the query (even if the
Expand Down Expand Up @@ -546,9 +547,11 @@ Awaitable<void> Server::process(
std::pair<bool, bool> Server::determineResultPinning(
const ad_utility::url_parser::ParamValueMap& params) {
const bool pinSubtrees =
checkParameter(params, "pinsubtrees", "true").has_value();
ad_utility::url_parser::checkParameter(params, "pinsubtrees", "true")
.has_value();
const bool pinResult =
checkParameter(params, "pinresult", "true").has_value();
ad_utility::url_parser::checkParameter(params, "pinresult", "true")
.has_value();
return {pinSubtrees, pinResult};
}

Expand Down Expand Up @@ -737,17 +740,11 @@ Awaitable<void> Server::sendStreamableResponse(
}
}

// ____________________________________________________________________________
class NoSupportedMediatypeError : public std::runtime_error {
public:
explicit NoSupportedMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// ____________________________________________________________________________
MediaType Server::determineMediaType(
const ad_utility::url_parser::ParamValueMap& params,
const ad_utility::httpUtils::HttpRequest auto& request) {
using namespace ad_utility::url_parser;
// The following code block determines the media type to be used for the
// result. The media type is either determined by the "Accept:" header of
// the request or by the URL parameter "action=..." (for TSV and CSV export,
Expand Down Expand Up @@ -1010,7 +1007,7 @@ Awaitable<void> Server::processQueryOrUpdate(
} catch (const QueryAlreadyInUseError& e) {
responseStatus = http::status::conflict;
exceptionErrorMsg = e.what();
} catch (const NoSupportedMediatypeError& e) {
} catch (const UnknownMediatypeError& e) {
responseStatus = http::status::bad_request;
exceptionErrorMsg = e.what();
} catch (const ad_utility::CancellationException& e) {
Expand Down Expand Up @@ -1139,24 +1136,3 @@ bool Server::checkAccessToken(
return true;
}
}

// _____________________________________________________________________________
std::optional<std::string> Server::checkParameter(
const ad_utility::url_parser::ParamValueMap& parameters,
std::string_view key, std::optional<std::string> value) {
auto param =
ad_utility::url_parser::getParameterCheckAtMostOnce(parameters, key);
if (!param.has_value()) {
return std::nullopt;
}
std::string parameterValue = param.value();

// If value is given, but not equal to param value, return std::nullopt. If
// no value is given, set it to param value.
if (value == std::nullopt) {
value = parameterValue;
} else if (value != parameterValue) {
return std::nullopt;
}
return value;
}
12 changes: 0 additions & 12 deletions src/engine/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,18 +256,6 @@ class Server {
/// HTTP error response.
bool checkAccessToken(std::optional<std::string_view> accessToken) const;

/// Checks if a URL parameter exists in the request, and it matches the
/// expected `value`. If yes, return the value, otherwise return
/// `std::nullopt`. If `value` is `std::nullopt`, only check if the key
/// exists. We need this because we have parameters like "cmd=stats", where a
/// fixed combination of the key and value determines the kind of action, as
/// well as parameters like "index-decription=...", where the key determines
/// the kind of action. If the key is not found, always return `std::nullopt`.
static std::optional<std::string> checkParameter(
const ad_utility::url_parser::ParamValueMap& parameters,
std::string_view key, std::optional<std::string> value);
FRIEND_TEST(ServerTest, checkParameter);

/// Check if user-provided timeout is authorized with a valid access-token or
/// lower than the server default. Return an empty optional and send a 403
/// Forbidden HTTP response if the change is not allowed. Return the new
Expand Down
1 change: 1 addition & 0 deletions src/global/IndexTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ using LocalVocabIndex = const LocalVocabEntry*;
using TextRecordIndex = ad_utility::TypedIndex<uint64_t, "TextRecordIndex">;
using WordVocabIndex = ad_utility::TypedIndex<uint64_t, "WordVocabIndex">;
using BlankNodeIndex = ad_utility::TypedIndex<uint64_t, "BlankNodeIndex">;
using DocumentIndex = ad_utility::TypedIndex<uint64_t, "DocumentIndex">;
Loading

0 comments on commit a97523f

Please sign in to comment.