From c82b973eff69d84f9a61c867c3ad7361beac4017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0koda?= Date: Thu, 19 Sep 2024 19:24:48 +0200 Subject: [PATCH] Fix configuration env substitution When a single subject and predicate pair have multiple values, all but the last one were lost. --- .../configuration/SubstituteEnvironment.java | 122 ++++++++++-------- .../SubstituteEnvironmentTest.java | 57 ++++++++ 2 files changed, 124 insertions(+), 55 deletions(-) diff --git a/executor/src/main/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironment.java b/executor/src/main/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironment.java index bc81aa97..db7ec3b0 100644 --- a/executor/src/main/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironment.java +++ b/executor/src/main/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironment.java @@ -11,27 +11,20 @@ import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; class SubstituteEnvironment { - static class StatementPair { + /** + * Represents a value for substitution. + * We can have more than one value or substitution value. + */ + static class Substitution { - public Resource resource; + public List values = new ArrayList<>(); - public IRI predicate; - - public Value value; - - public Value substitute; - - public StatementPair(IRI predicate) { - this.predicate = predicate; - } + public List substitutions = new ArrayList<>(); } @@ -45,59 +38,72 @@ public static EntityReference substitute( List statements = collectStatements( referenceSource, reference.getGraph()); - // Create map of predicates. - Map predicateMap = - collectPredicates(referenceSource, configurationType) - .stream().collect(Collectors.toMap( - iri -> iri, - StatementPair::new)); - - // Create map for substitution. - Map substitutionMap = - predicateMap.values().stream().collect(Collectors.toMap( - item -> valueFactory.createIRI( - item.predicate.stringValue() + "Substitution"), - item -> item)); - - // Collect information. + // Create map of predicates and substitute predicates for given resource. + // We use convention that the substitution predicate + // is the predicate + "Substitution"; + // The idea is to share the same internal, so we can + // access the same data using predicate as well as + // substitution predicate. + Map> predicateMap = new HashMap<>(); + Map> substitutionMap = new HashMap<>(); + for (var iri : collectPredicates(referenceSource, configurationType)) { + Map value = new HashMap<>(); + predicateMap.put(iri, value); + var substitutionIri = valueFactory.createIRI(iri + "Substitution"); + substitutionMap.put(substitutionIri, value); + } + + // Next we iterate the statements searching for values or + // predicates and substitute predicates. + // We need to pair them together for substitution. + // Other statements we just pass along. List nextStatements = new ArrayList<>(); for (Statement statement : statements) { + Resource subject = statement.getSubject(); IRI predicate = statement.getPredicate(); + if (predicateMap.containsKey(predicate)) { - StatementPair pair = predicateMap.get(predicate); - if (pair.resource != null - && pair.resource != statement.getSubject()) { - throw new RdfUtilsException("Not supported!"); - } - pair.resource = statement.getSubject(); - pair.value = statement.getObject(); + Objects.requireNonNull(predicateMap.get(predicate)) + .computeIfAbsent(subject, key -> new Substitution()) + .values.add(statement.getObject()); } else if (substitutionMap.containsKey(predicate)) { - StatementPair pair = substitutionMap.get(predicate); - if (pair.resource != null - && pair.resource != statement.getSubject()) { - throw new RdfUtilsException("Not supported!"); - } - pair.resource = statement.getSubject(); - pair.substitute = statement.getObject(); + Objects.requireNonNull(substitutionMap.get(predicate)) + .computeIfAbsent(subject, key -> new Substitution()) + .values.add(statement.getObject()); } else { nextStatements.add(statement); } } - // Generate back statements. - for (StatementPair pair : predicateMap.values()) { - Value value = pair.value; - if (pair.substitute != null) { - value = valueFactory.createLiteral( - substitute(env, pair.substitute.stringValue())); - } - if (pair.resource == null || value == null) { - continue; + // Next we need to assemble back statements in + // predicateMap or substitute them by the substitutions. + for (var predicateEntry : predicateMap.entrySet()) { + IRI predicate = predicateEntry.getKey(); + for (var resourceEntry : predicateEntry.getValue().entrySet()) { + Resource subject = resourceEntry.getKey(); + var value = resourceEntry.getValue(); + if (value.substitutions.isEmpty()) { + // We keep the original values. + for (var object : value.values) { + nextStatements.add(valueFactory.createStatement( + subject, predicate, object)); + } + } else { + // We use the values from substitution. + for (var object : value.substitutions) { + // We support substitution only for strings. + var nextObject = valueFactory.createLiteral( + substitute(env, object.stringValue())); + nextStatements.add(valueFactory.createStatement( + subject, predicate, nextObject)); + } + } + } - nextStatements.add(valueFactory.createStatement( - pair.resource, pair.predicate, value)); } + // At the last step we just store the statements in a new store + // using a new graph. IRI nextGraph = valueFactory.createIRI( reference.getGraph() + "/substituted"); Rdf4jSource nextSource = Rdf4jSource.createInMemory(); @@ -107,6 +113,9 @@ public static EntityReference substitute( reference.getResource(), nextGraph.stringValue(), nextSource); } + /** + * @return All controlled predicates for given configuration type. + */ private static Set collectPredicates( Rdf4jSource referenceSource, String configurationType) throws RdfUtilsException { @@ -119,6 +128,9 @@ private static Set collectPredicates( .collect(Collectors.toSet()); } + /** + * @return All statements in given graph. + */ private static List collectStatements (Rdf4jSource source, String graph) { List result = new ArrayList<>(); diff --git a/executor/src/test/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironmentTest.java b/executor/src/test/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironmentTest.java index 42839cb2..5661baae 100644 --- a/executor/src/test/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironmentTest.java +++ b/executor/src/test/java/com/linkedpipes/etl/executor/component/configuration/SubstituteEnvironmentTest.java @@ -1,10 +1,21 @@ package com.linkedpipes.etl.executor.component.configuration; import com.linkedpipes.etl.executor.ExecutorException; +import com.linkedpipes.etl.executor.api.v1.vocabulary.LP_OBJECTS; +import com.linkedpipes.etl.executor.rdf.entity.EntityReference; +import com.linkedpipes.etl.rdf.rdf4j.Rdf4jSource; +import com.linkedpipes.etl.rdf.utils.RdfBuilder; +import com.linkedpipes.etl.rdf.utils.RdfUtilsException; +import com.linkedpipes.etl.rdf.utils.model.RdfTriple; +import com.linkedpipes.etl.rdf.utils.vocabulary.RDF; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; public class SubstituteEnvironmentTest { @@ -21,4 +32,50 @@ public void simpleSubstitution() throws ExecutorException { env, "x-{LP_ETL_HOST}:{LP_ETL_PORT}")); } + @Test + public void preserveStatements() throws ExecutorException, RdfUtilsException { + Map env = new HashMap<>(); + env.put("LP_ETL_HOST", "lp"); + env.put("LP_ETL_PORT", "8080"); + + final var configurationClass = "http://localhost/Configuration"; + final var property = "http://localhost/1"; + final var graph = "http://localhost/graph"; + + var source = Rdf4jSource.createInMemory(); + var builder = RdfBuilder.create(source, "http://localhost/graph"); + + // Configuration entity. + builder.entity("http://localhost/entity") + .iri(RDF.TYPE, LP_OBJECTS.DESCRIPTION) + .iri(LP_OBJECTS.HAS_DESCRIBE, configurationClass) + .iri(LP_OBJECTS.HAS_MEMBER, property); + builder.entity(property) + .iri(LP_OBJECTS.HAS_PROPERTY, "http://localhost/predicate") + .iri(LP_OBJECTS.HAS_CONTROL, "http://localhost/predicateControl"); + + // Data entity. + builder.entity("http://localhost/entity") + .iri(RDF.TYPE, configurationClass) + .iri("http://localhost/predicate", "http://localhost/1") + .iri("http://localhost/predicate", "http://localhost/2"); + + builder.commit(); + + var actual = SubstituteEnvironment.substitute( + env, + source, + new EntityReference("http://localhost/entity",graph,null), + configurationClass); + + List sourceList = new ArrayList<>(); + source.statements(null, graph, sourceList::add); + + List targetList = new ArrayList<>(); + actual.getSource().triples(null, targetList::add); + + // We should get the same number of triples. + Assertions.assertEquals(sourceList.size(), targetList.size()); + } + }