From e0703e98471990a98929f4d10c184077dbaac4c2 Mon Sep 17 00:00:00 2001 From: Joseph Alphonso Date: Thu, 9 Jan 2025 07:27:50 -0500 Subject: [PATCH 01/16] refactor query ExcerptTest (#2683) Co-authored-by: Joe Alphonso --- .../test/java/datawave/query/ExcerptTest.java | 219 ++++++++---------- 1 file changed, 103 insertions(+), 116 deletions(-) diff --git a/warehouse/query-core/src/test/java/datawave/query/ExcerptTest.java b/warehouse/query-core/src/test/java/datawave/query/ExcerptTest.java index 88b3c84cc5b..bb460040708 100644 --- a/warehouse/query-core/src/test/java/datawave/query/ExcerptTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/ExcerptTest.java @@ -4,8 +4,8 @@ import static org.junit.Assert.assertTrue; import java.text.DateFormat; +import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -20,6 +20,7 @@ import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.jboss.arquillian.container.test.api.Deployment; @@ -52,13 +53,18 @@ public abstract class ExcerptTest { @RunWith(Arquillian.class) - public static class ShardRange extends datawave.query.ExcerptTest { + public static class ShardRangeTest extends datawave.query.ExcerptTest { protected static AccumuloClient connector = null; + @Override + protected void runTestQuery(String queryString) throws Exception { + super.runTestQuery(connector, queryString); + } + @BeforeClass public static void setUp() throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRange.class.toString(), log); + QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRangeTest.class.toString(), log); connector = qtth.client; WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.SHARD); Authorizations auths = new Authorizations("ALL"); @@ -67,21 +73,21 @@ public static void setUp() throws Exception { PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); } - @Override - protected void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParms, Collection goodResults) - throws Exception { - super.runTestQuery(connector, queryString, startDate, endDate, extraParms, goodResults); - } } @RunWith(Arquillian.class) - public static class DocumentRange extends datawave.query.ExcerptTest { + public static class DocumentRangeTest extends datawave.query.ExcerptTest { protected static AccumuloClient connector = null; + @Override + protected void runTestQuery(String queryString) throws Exception { + super.runTestQuery(connector, queryString); + } + @BeforeClass public static void setUp() throws Exception { - QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRange.class.toString(), log); + QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRangeTest.class.toString(), log); connector = qtth.client; WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.DOCUMENT); @@ -91,11 +97,6 @@ public static void setUp() throws Exception { PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); } - @Override - protected void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParms, Collection goodResults) - throws Exception { - super.runTestQuery(connector, queryString, startDate, endDate, extraParms, goodResults); - } } private static final Logger log = Logger.getLogger(datawave.query.ExcerptTest.class); @@ -111,6 +112,11 @@ protected void runTestQuery(String queryString, Date startDate, Date endDate, Ma protected KryoDocumentDeserializer deserializer; private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); + private Date startDate; + private Date endDate; + + private final Map extraParameters = new HashMap<>(); + private final Set expectedResults = new HashSet<>(); @Deployment public static JavaArchive createDeployment() throws Exception { @@ -131,18 +137,46 @@ public static void teardown() { } @Before - public void setup() { + public void setup() throws ParseException { TimeZone.setDefault(TimeZone.getTimeZone("GMT")); log.setLevel(Level.TRACE); logic.setFullTableScanEnabled(true); deserializer = new KryoDocumentDeserializer(); + startDate = format.parse("19000101"); + endDate = format.parse("20240101"); + extraParameters.clear(); + expectedResults.clear(); + } + + protected void setDefaultQueryParams() { + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("hit.list", "true"); + extraParameters.put("return.fields", "HIT_EXCERPT"); + extraParameters.put("query.syntax", "LUCENE"); + } + + protected void updateQueryParam(String key, String value) { + if (StringUtils.isNoneBlank(key, value)) { + extraParameters.put(key, value); + } + } + + protected void addExpectedResult(String result) { + if (StringUtils.isNotBlank(result)) { + expectedResults.add(result); + } + } + + protected boolean initialized() { + return !(extraParameters.isEmpty() || expectedResults.isEmpty()); } - protected abstract void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParms, Collection goodResults) - throws Exception; + protected abstract void runTestQuery(String queryString) throws Exception; - protected void runTestQuery(AccumuloClient connector, String queryString, Date startDate, Date endDate, Map extraParms, - Collection goodResults) throws Exception { + protected void runTestQuery(AccumuloClient connector, String queryString) throws Exception { + if (!initialized()) { + throw new Exception("must set query parameters and expected results before running query"); + } QueryImpl settings = new QueryImpl(); settings.setBeginDate(startDate); @@ -150,7 +184,7 @@ protected void runTestQuery(AccumuloClient connector, String queryString, Date s settings.setPagesize(Integer.MAX_VALUE); settings.setQueryAuthorizations(auths.serialize()); settings.setQuery(queryString); - settings.setParameters(extraParms); + settings.setParameters(extraParameters); settings.setId(UUID.randomUUID()); log.debug("query: " + settings.getQuery()); @@ -180,7 +214,7 @@ protected void runTestQuery(AccumuloClient connector, String queryString, Date s if (attribute instanceof Attributes) { for (Attribute attr : ((Attributes) attribute).getAttributes()) { String toFind = dictionaryEntry.getKey() + ":" + attr; - boolean found = goodResults.remove(toFind); + boolean found = expectedResults.remove(toFind); if (found) log.debug("removed " + toFind); else { @@ -191,7 +225,7 @@ protected void runTestQuery(AccumuloClient connector, String queryString, Date s String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue(); - boolean found = goodResults.remove(toFind); + boolean found = expectedResults.remove(toFind); if (found) log.debug("removed " + toFind); else { @@ -203,210 +237,163 @@ protected void runTestQuery(AccumuloClient connector, String queryString, Date s } assertTrue("unexpected fields returned: " + unexpectedFields, unexpectedFields.isEmpty()); - assertTrue(goodResults + " was not empty", goodResults.isEmpty()); + assertTrue(expectedResults + " was not empty", expectedResults.isEmpty()); assertFalse("No docs were returned!", docs.isEmpty()); } @Test public void simpleTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(farther) #EXCERPT_FIELDS(QUOTE/2)"; // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:get much [farther] with a: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:get much [farther] with a: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void simpleTestBefore() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(farther) #EXCERPT_FIELDS(QUOTE/2/before)"; // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:get much [farther]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:get much [farther]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void simpleTestAfter() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(farther) #EXCERPT_FIELDS(QUOTE/2/after)"; // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:[farther] with a: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:[farther] with a: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void lessSimpleBeforeTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/2/before)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:an offer [he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:an offer [he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void lessSimpleAfterTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/2/after)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:[he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:[he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void lessSimpleTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/2)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:an offer [he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:an offer [he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void biggerRangeThanQuoteLength() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/20)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:im gonna make him an offer [he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:im gonna make him an offer [he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void biggerRangeThanQuoteLengthBeforeTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/20/before)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:im gonna make him an offer [he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:im gonna make him an offer [he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void biggerRangeThanQuoteLengthAfterTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(he cant refuse) #EXCERPT_FIELDS(QUOTE/20/after)"; - Set goodResults = new HashSet<>(Set.of("HIT_EXCERPT:[he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:[he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void wholeQuote() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); String queryString = "QUOTE:(im gonna make him an offer he cant refuse) #EXCERPT_FIELDS(QUOTE/20)"; - Set goodResults = new HashSet<>( - Set.of("HIT_EXCERPT:[im] [gonna] [make] [him] [an] [offer] [he] [cant] [refuse]: : [] 9223372036854775807 false")); + addExpectedResult("HIT_EXCERPT:[im] [gonna] [make] [him] [an] [offer] [he] [cant] [refuse]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void anotherFirstTerm() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT,UUID"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); + updateQueryParam("return.fields", "HIT_EXCERPT,UUID"); // "if" is the first term for one event String queryString = "QUOTE:(if) #EXCERPT_FIELDS(QUOTE/3)"; - Set goodResults = new HashSet<>(Set.of("UUID.0:SOPRANO", "HIT_EXCERPT:[if] you can quote: : [] 9223372036854775807 false")); + addExpectedResult("UUID.0:SOPRANO"); + addExpectedResult("HIT_EXCERPT:[if] you can quote: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void anotherFirstTermBeforeTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT,UUID"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); + updateQueryParam("return.fields", "HIT_EXCERPT,UUID"); // "if" is the first term for one event String queryString = "QUOTE:(if) #EXCERPT_FIELDS(QUOTE/3/before)"; - Set goodResults = new HashSet<>(Set.of("UUID.0:SOPRANO", "HIT_EXCERPT:[if]: : [] 9223372036854775807 false")); + addExpectedResult("UUID.0:SOPRANO"); + addExpectedResult("HIT_EXCERPT:[if]: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } @Test public void anotherFirstTermAfterTest() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("hit.list", "true"); - extraParameters.put("return.fields", "HIT_EXCERPT,UUID"); - extraParameters.put("query.syntax", "LUCENE"); + setDefaultQueryParams(); + updateQueryParam("return.fields", "HIT_EXCERPT,UUID"); // "if" is the first term for one event String queryString = "QUOTE:(if) #EXCERPT_FIELDS(QUOTE/3/after)"; - Set goodResults = new HashSet<>(Set.of("UUID.0:SOPRANO", "HIT_EXCERPT:[if] you can quote: : [] 9223372036854775807 false")); + addExpectedResult("UUID.0:SOPRANO"); + addExpectedResult("HIT_EXCERPT:[if] you can quote: : [] 9223372036854775807 false"); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults); + runTestQuery(queryString); } } From ee84f3c1565f5c148029eb7f236e1041ec22972c Mon Sep 17 00:00:00 2001 From: hgklohr Date: Thu, 9 Jan 2025 13:53:04 +0000 Subject: [PATCH 02/16] Update pom's for 7.14.0-SNAPSHOT --- common-test/pom.xml | 2 +- contrib/datawave-quickstart/docker/pom.xml | 2 +- core/cached-results/pom.xml | 2 +- core/common-util/pom.xml | 2 +- core/common/pom.xml | 2 +- core/connection-pool/pom.xml | 2 +- core/map-reduce/pom.xml | 2 +- core/modification/pom.xml | 2 +- core/pom.xml | 2 +- core/query/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/age-off-utils/pom.xml | 2 +- warehouse/age-off/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-ssdeep/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- warehouse/ssdeep-common/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/metrics/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 73 files changed, 73 insertions(+), 73 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index c340d1e48aa..fbc23678c5f 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/docker/pom.xml b/contrib/datawave-quickstart/docker/pom.xml index de19a23c2d4..4654e03c5d7 100644 --- a/contrib/datawave-quickstart/docker/pom.xml +++ b/contrib/datawave-quickstart/docker/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT ../../../pom.xml quickstart diff --git a/core/cached-results/pom.xml b/core/cached-results/pom.xml index 9c5dbd86f17..55a29764efd 100644 --- a/core/cached-results/pom.xml +++ b/core/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-cached-results ${project.artifactId} diff --git a/core/common-util/pom.xml b/core/common-util/pom.xml index f7ee5e356f3..69474bf0727 100644 --- a/core/common-util/pom.xml +++ b/core/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-common-util ${project.artifactId} diff --git a/core/common/pom.xml b/core/common/pom.xml index 77522fd9a10..43dc72494cc 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-common ${project.artifactId} diff --git a/core/connection-pool/pom.xml b/core/connection-pool/pom.xml index 9c3ed01fa4b..b0605d6bc9c 100644 --- a/core/connection-pool/pom.xml +++ b/core/connection-pool/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-connection-pool ${project.artifactId} diff --git a/core/map-reduce/pom.xml b/core/map-reduce/pom.xml index 35454964475..188fdc32cb5 100644 --- a/core/map-reduce/pom.xml +++ b/core/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-map-reduce ${project.artifactId} diff --git a/core/modification/pom.xml b/core/modification/pom.xml index 3f6e8cf6313..71d992f5758 100644 --- a/core/modification/pom.xml +++ b/core/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-modification ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index ebb7175043e..0661dcc5306 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT gov.nsa.datawave.core datawave-core-parent diff --git a/core/query/pom.xml b/core/query/pom.xml index a80897693ac..de90c5439de 100644 --- a/core/query/pom.xml +++ b/core/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core-query ${project.artifactId} diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 9cfbcece2c4..5ed3ac15a91 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index 3bf8e7133e3..e81315e71a0 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index 396af33b295..bd4754c3fca 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index af7613a61dd..d15a19b13cd 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index e59a2a4e49f..cea898e34bc 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index 3d470b98d46..e3a202412a6 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index e1d89cc4670..635d99fa0f5 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/age-off-utils/pom.xml b/warehouse/age-off-utils/pom.xml index 38a8617dd33..3143df48f7d 100644 --- a/warehouse/age-off-utils/pom.xml +++ b/warehouse/age-off-utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-age-off-utils ${project.artifactId} diff --git a/warehouse/age-off/pom.xml b/warehouse/age-off/pom.xml index 64b5d313f9a..209b787775b 100644 --- a/warehouse/age-off/pom.xml +++ b/warehouse/age-off/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-age-off ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 960c15ee07e..08bb03c2ad2 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT assemble-datawave jar diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index 3c8d9a3f099..d5995e4e246 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 968f755c53c..15df6429028 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 8fe59a7c3cc..4abb6170c92 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index fabb7a2798b..8eda4ab399a 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index 38e16d610fc..4d1f0a5d6d3 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 407f1e4272f..7fef2af5aea 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index e86159dba4e..a1afa6009d3 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 6b6e9a6506a..aace1d2f9a6 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index e48c5861476..c872c25840b 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 90fb606c9be..4aa75ea7c98 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 15a79611354..e11abd91a50 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 7353391f338..e6d5e728d6b 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index dd9b9acb171..23a1e339ff4 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 325bff42ea4..2a1a58a8ad9 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-ssdeep/pom.xml b/warehouse/ingest-ssdeep/pom.xml index 6fd679cf767..8880548c931 100644 --- a/warehouse/ingest-ssdeep/pom.xml +++ b/warehouse/ingest-ssdeep/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-ssdeep diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index 83d09f468bb..6e1d0be9802 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 136f22bbe6e..13d08f1c2e1 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index eefe49bf57d..436882d33d8 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 672d5f542a4..a060e0fecbc 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index c8e99e19d46..d3217a980a0 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index b7d8f73fa7d..1ba8b40b6e1 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 25be249ea0c..c21ddfb8f76 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 1976f124cd7..af9b2ee69f4 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/warehouse/ssdeep-common/pom.xml b/warehouse/ssdeep-common/pom.xml index 419ac4a44a4..795b07133bf 100644 --- a/warehouse/ssdeep-common/pom.xml +++ b/warehouse/ssdeep-common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ssdeep-common diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index d5105ed39dd..b2b1cc31a04 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index ee9adc557d4..8f05782cb55 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 1be0e6f460e..f5f3d3a59a5 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index b5c61ef65f7..0f9d305fceb 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 220de0a1cdd..7e864febec6 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index dcf641c672a..037e3f1c048 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 3902115f4b9..530c7d228f6 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 86c6b5604d1..b35ce86386b 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index 65cdbab6453..dfcdbeffdcd 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index c39e8701963..b520bf5a80c 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index 837e3fa8dc7..ac7344f1028 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index c26e60c41ca..529ac31f810 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 894dc78d8af..329e08a01f9 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 1e6d633a372..6f95e154537 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index cb1b0e29a30..5a9a628792d 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index ff8dc100e50..84fc3c573b9 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index cd043708f21..faf6b8660f1 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 7374807a9f0..813c0b801e7 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 9920d1b606d..41cc05335d2 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 8f1c89a09b6..3a06f4950bf 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/metrics/pom.xml b/web-services/metrics/pom.xml index 8b7d3c42bd0..5fc064eb7dd 100644 --- a/web-services/metrics/pom.xml +++ b/web-services/metrics/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-metrics ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index c1f606e4cfc..0db6186c209 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index bfc58214da7..3e856b48441 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 958283e8255..5887806da9e 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index 040a8f2c011..a1e50539dab 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index 96b02b5e777..83fb4234fec 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 3b437cf6512..bc81188ade0 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index e40f9d3f842..020c2dd1a5f 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 4b7d9001810..52133ee5b36 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.14.0-SNAPSHOT datawave-ws-web-root war From c0eceed9eb2cd0c57ddd14c88554e2d25b24a3fe Mon Sep 17 00:00:00 2001 From: hgklohr Date: Thu, 9 Jan 2025 13:54:36 +0000 Subject: [PATCH 03/16] 7.13.0 --- common-test/pom.xml | 2 +- contrib/datawave-quickstart/docker/pom.xml | 2 +- core/cached-results/pom.xml | 2 +- core/common-util/pom.xml | 2 +- core/common/pom.xml | 2 +- core/connection-pool/pom.xml | 2 +- core/map-reduce/pom.xml | 2 +- core/modification/pom.xml | 2 +- core/pom.xml | 2 +- core/query/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/age-off-utils/pom.xml | 2 +- warehouse/age-off/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-ssdeep/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- warehouse/ssdeep-common/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/metrics/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 73 files changed, 73 insertions(+), 73 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index c340d1e48aa..bc2e978c6e5 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/docker/pom.xml b/contrib/datawave-quickstart/docker/pom.xml index de19a23c2d4..d345c345232 100644 --- a/contrib/datawave-quickstart/docker/pom.xml +++ b/contrib/datawave-quickstart/docker/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 ../../../pom.xml quickstart diff --git a/core/cached-results/pom.xml b/core/cached-results/pom.xml index 9c5dbd86f17..de7a5073abc 100644 --- a/core/cached-results/pom.xml +++ b/core/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-cached-results ${project.artifactId} diff --git a/core/common-util/pom.xml b/core/common-util/pom.xml index f7ee5e356f3..2c44666b388 100644 --- a/core/common-util/pom.xml +++ b/core/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-common-util ${project.artifactId} diff --git a/core/common/pom.xml b/core/common/pom.xml index 77522fd9a10..9f35508258a 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-common ${project.artifactId} diff --git a/core/connection-pool/pom.xml b/core/connection-pool/pom.xml index 9c3ed01fa4b..650fe3bc4ea 100644 --- a/core/connection-pool/pom.xml +++ b/core/connection-pool/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-connection-pool ${project.artifactId} diff --git a/core/map-reduce/pom.xml b/core/map-reduce/pom.xml index 35454964475..33987689e28 100644 --- a/core/map-reduce/pom.xml +++ b/core/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-map-reduce ${project.artifactId} diff --git a/core/modification/pom.xml b/core/modification/pom.xml index 3f6e8cf6313..adae7736f76 100644 --- a/core/modification/pom.xml +++ b/core/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-modification ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index ebb7175043e..a25221a4017 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 gov.nsa.datawave.core datawave-core-parent diff --git a/core/query/pom.xml b/core/query/pom.xml index a80897693ac..05c21917ca5 100644 --- a/core/query/pom.xml +++ b/core/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core-query ${project.artifactId} diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 9cfbcece2c4..0838e156330 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0-SNAPSHOT + 7.13.0 gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index 3bf8e7133e3..6bbdbdbbeae 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index 396af33b295..f478237aa29 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index af7613a61dd..a84b680219b 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index e59a2a4e49f..13d17e2d823 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index 3d470b98d46..55d8627af09 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index e1d89cc4670..303647fc45f 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/age-off-utils/pom.xml b/warehouse/age-off-utils/pom.xml index 38a8617dd33..b64d7cbda50 100644 --- a/warehouse/age-off-utils/pom.xml +++ b/warehouse/age-off-utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-age-off-utils ${project.artifactId} diff --git a/warehouse/age-off/pom.xml b/warehouse/age-off/pom.xml index 64b5d313f9a..23067621abe 100644 --- a/warehouse/age-off/pom.xml +++ b/warehouse/age-off/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-age-off ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 960c15ee07e..b765d92c6b3 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0-SNAPSHOT + 7.13.0 assemble-datawave jar diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index 3c8d9a3f099..790c95c1ed1 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 968f755c53c..2451fdea67e 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0-SNAPSHOT + 7.13.0 assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 8fe59a7c3cc..b3374e4319a 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index fabb7a2798b..2f7272a08a0 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index 38e16d610fc..aa8c03c98ba 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 407f1e4272f..4639b3d2bd7 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index e86159dba4e..45fb3c706a4 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index 6b6e9a6506a..f22455b20bf 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index e48c5861476..e12ae2446b2 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 90fb606c9be..959e1c089a9 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 15a79611354..964ba8748f1 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 7353391f338..a4dd3d81a50 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index dd9b9acb171..d0cb08d15da 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 325bff42ea4..ec7f78254a8 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-ssdeep/pom.xml b/warehouse/ingest-ssdeep/pom.xml index 6fd679cf767..26a41751716 100644 --- a/warehouse/ingest-ssdeep/pom.xml +++ b/warehouse/ingest-ssdeep/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-ssdeep diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index 83d09f468bb..90bad0f326c 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 136f22bbe6e..c0571134f66 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index eefe49bf57d..0453915e0ba 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 672d5f542a4..b18447730dc 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index c8e99e19d46..efb7900d97e 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index b7d8f73fa7d..0570198d970 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 25be249ea0c..94a7d6e0cce 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 1976f124cd7..8688ed307c7 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-regression-testing ${project.artifactId} diff --git a/warehouse/ssdeep-common/pom.xml b/warehouse/ssdeep-common/pom.xml index 419ac4a44a4..2a05e582e1d 100644 --- a/warehouse/ssdeep-common/pom.xml +++ b/warehouse/ssdeep-common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ssdeep-common diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index d5105ed39dd..20b35444b2d 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index ee9adc557d4..412aed73809 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 1be0e6f460e..0378154e0a4 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index b5c61ef65f7..45d98a064c2 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 220de0a1cdd..bab63d6c400 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index dcf641c672a..1501c0b4a65 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 3902115f4b9..2c7c732024e 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 86c6b5604d1..a1fcbdb5fb1 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index 65cdbab6453..ca3497638de 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index c39e8701963..7c0e761b3ee 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index 837e3fa8dc7..c604144e048 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0-SNAPSHOT + 7.13.0 spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index c26e60c41ca..64123d6f140 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 894dc78d8af..22a2b7fcd93 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 1e6d633a372..725795dd978 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index cb1b0e29a30..931b9d58b0d 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index ff8dc100e50..7ea5d9508a2 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index cd043708f21..43596713ed4 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 7374807a9f0..036966fb14d 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 9920d1b606d..193a764116b 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 8f1c89a09b6..c9e22cff2d9 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-map-reduce ejb diff --git a/web-services/metrics/pom.xml b/web-services/metrics/pom.xml index 8b7d3c42bd0..400ab1ddd65 100644 --- a/web-services/metrics/pom.xml +++ b/web-services/metrics/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-metrics ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index c1f606e4cfc..719833e8260 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index bfc58214da7..4ca92ae8c72 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 958283e8255..8f2e332875a 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0-SNAPSHOT + 7.13.0 gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index 040a8f2c011..4a725549c12 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index 96b02b5e777..55eaa340c01 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 3b437cf6512..5c9d614979f 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index e40f9d3f842..526bd34aaa8 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 4b7d9001810..a8a5fc22d63 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0-SNAPSHOT + 7.13.0 datawave-ws-web-root war From 0eb3c0d7d53be3149c4aed2960d4021279b10548 Mon Sep 17 00:00:00 2001 From: hgklohr Date: Thu, 9 Jan 2025 13:56:06 +0000 Subject: [PATCH 04/16] 7.13.1-SNAPSHOT --- common-test/pom.xml | 2 +- contrib/datawave-quickstart/docker/pom.xml | 2 +- core/cached-results/pom.xml | 2 +- core/common-util/pom.xml | 2 +- core/common/pom.xml | 2 +- core/connection-pool/pom.xml | 2 +- core/map-reduce/pom.xml | 2 +- core/modification/pom.xml | 2 +- core/pom.xml | 2 +- core/query/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/age-off-utils/pom.xml | 2 +- warehouse/age-off/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-ssdeep/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- warehouse/ssdeep-common/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/metrics/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 73 files changed, 73 insertions(+), 73 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index bc2e978c6e5..045458b58f7 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/docker/pom.xml b/contrib/datawave-quickstart/docker/pom.xml index d345c345232..5383e412525 100644 --- a/contrib/datawave-quickstart/docker/pom.xml +++ b/contrib/datawave-quickstart/docker/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT ../../../pom.xml quickstart diff --git a/core/cached-results/pom.xml b/core/cached-results/pom.xml index de7a5073abc..3978c7efb6f 100644 --- a/core/cached-results/pom.xml +++ b/core/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-cached-results ${project.artifactId} diff --git a/core/common-util/pom.xml b/core/common-util/pom.xml index 2c44666b388..0d53fe07b61 100644 --- a/core/common-util/pom.xml +++ b/core/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-common-util ${project.artifactId} diff --git a/core/common/pom.xml b/core/common/pom.xml index 9f35508258a..53ce69a4172 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-common ${project.artifactId} diff --git a/core/connection-pool/pom.xml b/core/connection-pool/pom.xml index 650fe3bc4ea..da23027b4ce 100644 --- a/core/connection-pool/pom.xml +++ b/core/connection-pool/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-connection-pool ${project.artifactId} diff --git a/core/map-reduce/pom.xml b/core/map-reduce/pom.xml index 33987689e28..15c7fe5e471 100644 --- a/core/map-reduce/pom.xml +++ b/core/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-map-reduce ${project.artifactId} diff --git a/core/modification/pom.xml b/core/modification/pom.xml index adae7736f76..d26ca0e6594 100644 --- a/core/modification/pom.xml +++ b/core/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-modification ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index a25221a4017..c62c8515a37 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT gov.nsa.datawave.core datawave-core-parent diff --git a/core/query/pom.xml b/core/query/pom.xml index 05c21917ca5..a84c96ef89d 100644 --- a/core/query/pom.xml +++ b/core/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core-query ${project.artifactId} diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 0838e156330..fb43d60afaa 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.13.0 + 7.13.1-SNAPSHOT gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index 6bbdbdbbeae..53e9a901a83 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index f478237aa29..f2cbf6d9e2b 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index a84b680219b..4f587dea1e2 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index 13d17e2d823..36149b31dc5 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index 55d8627af09..3f39f0e4b0b 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 303647fc45f..3e696ddf414 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/age-off-utils/pom.xml b/warehouse/age-off-utils/pom.xml index b64d7cbda50..fcfc2cb712a 100644 --- a/warehouse/age-off-utils/pom.xml +++ b/warehouse/age-off-utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-age-off-utils ${project.artifactId} diff --git a/warehouse/age-off/pom.xml b/warehouse/age-off/pom.xml index 23067621abe..933b4b473b6 100644 --- a/warehouse/age-off/pom.xml +++ b/warehouse/age-off/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-age-off ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index b765d92c6b3..894a67be28a 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0 + 7.13.1-SNAPSHOT assemble-datawave jar diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index 790c95c1ed1..f00cd18f382 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 2451fdea67e..ae4ffbc3df0 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.13.0 + 7.13.1-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index b3374e4319a..e5b0569f072 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 2f7272a08a0..37c897827a3 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index aa8c03c98ba..2dded8e8a61 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 4639b3d2bd7..215e1a9e898 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 45fb3c706a4..c3a05382696 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index f22455b20bf..86d570b36de 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index e12ae2446b2..b172b507bdf 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 959e1c089a9..5c4ad6bc05a 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index 964ba8748f1..b3ad9ee24cb 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index a4dd3d81a50..8a980e8eab6 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index d0cb08d15da..cbc00c35c2e 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index ec7f78254a8..e4283d5175a 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-ssdeep/pom.xml b/warehouse/ingest-ssdeep/pom.xml index 26a41751716..3dccd9deeea 100644 --- a/warehouse/ingest-ssdeep/pom.xml +++ b/warehouse/ingest-ssdeep/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-ssdeep diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index 90bad0f326c..3c9565b03be 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index c0571134f66..6f8a943f7fc 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 0453915e0ba..27145661274 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index b18447730dc..bb9390983db 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index efb7900d97e..13fdada4168 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 0570198d970..11e95794266 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 94a7d6e0cce..49aaffee094 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 8688ed307c7..12ae0f4c453 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/warehouse/ssdeep-common/pom.xml b/warehouse/ssdeep-common/pom.xml index 2a05e582e1d..bcde4b548a2 100644 --- a/warehouse/ssdeep-common/pom.xml +++ b/warehouse/ssdeep-common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ssdeep-common diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index 20b35444b2d..65092c3995d 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index 412aed73809..b6b12084c58 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 0378154e0a4..a0e06ca23f1 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 45d98a064c2..01ccc3addf8 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index bab63d6c400..96011db6281 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index 1501c0b4a65..81a6c793243 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 2c7c732024e..0bac66fde10 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index a1fcbdb5fb1..18ccce5b57b 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index ca3497638de..b50f089e622 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index 7c0e761b3ee..58bb213649e 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index c604144e048..28216f183ce 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.13.0 + 7.13.1-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index 64123d6f140..f43836f233f 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 22a2b7fcd93..f49d96e0a00 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 725795dd978..fb01c99dc5e 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 931b9d58b0d..b8d09eec6c1 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index 7ea5d9508a2..543a3550187 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index 43596713ed4..59db34bb491 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 036966fb14d..37b4d012a0d 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 193a764116b..a3f7b5459af 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index c9e22cff2d9..8800b39a938 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/metrics/pom.xml b/web-services/metrics/pom.xml index 400ab1ddd65..b46bb03f26b 100644 --- a/web-services/metrics/pom.xml +++ b/web-services/metrics/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-metrics ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 719833e8260..4d4134198cd 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 4ca92ae8c72..55cce5b9ccc 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 8f2e332875a..0768dde985c 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.13.0 + 7.13.1-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index 4a725549c12..6dfadb80e27 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index 55eaa340c01..0526cc9cc42 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 5c9d614979f..70d8f3fece1 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 526bd34aaa8..a57651e14ac 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index a8a5fc22d63..90967456501 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.13.0 + 7.13.1-SNAPSHOT datawave-ws-web-root war From 99ae8d6f12d50af14ffc6e79918872da2eb15a79 Mon Sep 17 00:00:00 2001 From: Bill Oley Date: Thu, 9 Jan 2025 17:51:37 -0500 Subject: [PATCH 05/16] MarkingFunctionsFactory.createMarkingFunctions() should not be called as a static initializer (#2288) (#2289) Co-authored-by: Ivan Bella <347158+ivakegg@users.noreply.github.com> --- .../core/query/cachedresults/CacheableQueryRowImpl.java | 1 - .../handler/dateindex/DateIndexDataTypeHandler.java | 4 +--- .../java/datawave/ingest/csv/mr/input/CSVRecordReader.java | 3 +-- .../iterators/FieldIndexCountingIteratorPerVisibility.java | 3 +-- .../core/iterators/GlobalIndexDateSummaryIterator.java | 3 +-- .../datawave/core/iterators/ResultCountingIterator.java | 3 +-- .../main/java/datawave/query/attributes/AttributeBag.java | 7 ++----- .../java/datawave/query/discovery/TermInfoAggregation.java | 5 ++--- .../query/iterators/FieldIndexCountingIterator.java | 4 +--- .../query/metrics/ShardTableQueryMetricHandler.java | 7 +++---- 10 files changed, 13 insertions(+), 27 deletions(-) diff --git a/core/cached-results/src/main/java/datawave/core/query/cachedresults/CacheableQueryRowImpl.java b/core/cached-results/src/main/java/datawave/core/query/cachedresults/CacheableQueryRowImpl.java index ab4673fb274..ce7fd04ef5e 100644 --- a/core/cached-results/src/main/java/datawave/core/query/cachedresults/CacheableQueryRowImpl.java +++ b/core/cached-results/src/main/java/datawave/core/query/cachedresults/CacheableQueryRowImpl.java @@ -19,7 +19,6 @@ import datawave.data.type.Type; import datawave.marking.MarkingFunctions; -import datawave.marking.MarkingFunctionsFactory; import datawave.webservice.query.cachedresults.CacheableQueryRow; import datawave.webservice.query.data.ObjectSizeOf; import datawave.webservice.query.util.TypedValue; diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java index c4c3da55537..25fa6caee29 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/mapreduce/handler/dateindex/DateIndexDataTypeHandler.java @@ -83,8 +83,6 @@ public class DateIndexDataTypeHandler implements DataTypeHandler, public static final String DATEINDEX_TNAME = "date.index.table.name"; public static final String DATEINDEX_LPRIORITY = "date.index.table.loader.priority"; - private static final MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); - // comma delimited = values public static final String DATEINDEX_TYPE_TO_FIELDS = ".date.index.type.to.field.map"; @@ -367,7 +365,7 @@ private Value createDateIndexValue(int shard) { * @return the flattened visibility */ protected byte[] flatten(ColumnVisibility vis) { - return markingFunctions.flatten(vis); + return MarkingFunctions.Factory.createMarkingFunctions().flatten(vis); } public Text getDateIndexTableName() { diff --git a/warehouse/ingest-csv/src/main/java/datawave/ingest/csv/mr/input/CSVRecordReader.java b/warehouse/ingest-csv/src/main/java/datawave/ingest/csv/mr/input/CSVRecordReader.java index 52899115f0d..f09c886b623 100644 --- a/warehouse/ingest-csv/src/main/java/datawave/ingest/csv/mr/input/CSVRecordReader.java +++ b/warehouse/ingest-csv/src/main/java/datawave/ingest/csv/mr/input/CSVRecordReader.java @@ -35,7 +35,6 @@ public class CSVRecordReader extends CSVReaderBase implements EventFixer { private static final Logger log = Logger.getLogger(CSVRecordReader.class); private static final IngestConfiguration ingestConfig = IngestConfigurationFactory.getIngestConfiguration(); - private static final MarkingFunctions markingFunctions = MarkingFunctionsFactory.createMarkingFunctions(); protected String csvEventId; private final Multimap metadataForValidation = ArrayListMultimap.create(100, 1); @@ -128,7 +127,7 @@ protected void decorateEvent() { if (null != this.securityMarkings && !this.securityMarkings.isEmpty()) { event.setSecurityMarkings(securityMarkings); try { - event.setVisibility(markingFunctions.translateToColumnVisibility(securityMarkings)); + event.setVisibility(MarkingFunctionsFactory.createMarkingFunctions().translateToColumnVisibility(securityMarkings)); } catch (MarkingFunctions.Exception e) { log.error("Could not set default ColumnVisibility for the event", e); throw new RuntimeException(e); diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/FieldIndexCountingIteratorPerVisibility.java b/warehouse/query-core/src/main/java/datawave/core/iterators/FieldIndexCountingIteratorPerVisibility.java index d250133bf40..9fa11756fe3 100644 --- a/warehouse/query-core/src/main/java/datawave/core/iterators/FieldIndexCountingIteratorPerVisibility.java +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/FieldIndexCountingIteratorPerVisibility.java @@ -98,7 +98,6 @@ public class FieldIndexCountingIteratorPerVisibility extends WrappingIterator im private Set columnVisibilities = Sets.newHashSet(); private TreeMap keyCache = null; - protected static final MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); // ------------------------------------------------------------------------- // ------------- Constructors @@ -566,7 +565,7 @@ private Map buildReturnKeys() { ColumnVisibility cv = null; try { // Calculate the columnVisibility for this key from the combiner. - cv = markingFunctions.combine(columnVisibilities); + cv = MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); } catch (Exception e) { log.error("Could not create combined columnVisibility for the count", e); return null; diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/GlobalIndexDateSummaryIterator.java b/warehouse/query-core/src/main/java/datawave/core/iterators/GlobalIndexDateSummaryIterator.java index 081a275d2ad..6329697cc99 100644 --- a/warehouse/query-core/src/main/java/datawave/core/iterators/GlobalIndexDateSummaryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/GlobalIndexDateSummaryIterator.java @@ -45,7 +45,6 @@ public class GlobalIndexDateSummaryIterator implements SortedKeyValueIterator returnCache = new TreeMap<>(); protected Set columnVisibilities = Sets.newHashSet(); - private static MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); public GlobalIndexDateSummaryIterator() {} @@ -257,7 +256,7 @@ public Map getKeyValues() throws IOException { Set columnVisibilities = this.columnVisibilitiesMap.get(datatype); // Note that the access controls found in the combined ColumnVisibility will be pulled out appropriately here - ColumnVisibility cv = markingFunctions.combine(columnVisibilities); + ColumnVisibility cv = MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); // Create a new Key compatible with the shardIndex key format Key k = new Key(this.fieldValue, this.fieldName, this.date + '\0' + datatype, new String(cv.getExpression())); diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/ResultCountingIterator.java b/warehouse/query-core/src/main/java/datawave/core/iterators/ResultCountingIterator.java index d704738a36c..04027b8beac 100644 --- a/warehouse/query-core/src/main/java/datawave/core/iterators/ResultCountingIterator.java +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/ResultCountingIterator.java @@ -66,7 +66,6 @@ public long read() { private String threadName = null; protected Set columnVisibilities = Sets.newHashSet(); - private static MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); public ResultCountingIterator() { threadName = Thread.currentThread().getName(); @@ -210,7 +209,7 @@ public Value getTopValue() { ColumnVisibility cv = null; try { - cv = markingFunctions.combine(columnVisibilities); + cv = MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); } catch (MarkingFunctions.Exception e) { log.error("Could not create combined columnVisibility for the count", e); return null; diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java index 4278f2235f9..4467808c241 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/AttributeBag.java @@ -12,7 +12,6 @@ import datawave.marking.MarkingFunctions; import datawave.marking.MarkingFunctions.Exception; -import datawave.marking.MarkingFunctionsFactory; public abstract class AttributeBag> extends Attribute implements Serializable { @@ -23,10 +22,8 @@ public abstract class AttributeBag> extends Attribute private static final long ONE_DAY_MS = 1000l * 60 * 60 * 24; - protected static final MarkingFunctions markingFunctions = MarkingFunctionsFactory.createMarkingFunctions(); - public MarkingFunctions getMarkingFunctions() { - return markingFunctions; + return MarkingFunctions.Factory.createMarkingFunctions(); } protected AttributeBag() { @@ -83,7 +80,7 @@ protected ColumnVisibility combineAndSetColumnVisibilities(Collection attr : attributes) { columnVisibilities.add(attr.getColumnVisibility()); } - return AttributeBag.markingFunctions.combine(columnVisibilities); + return MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); } private long updateTimestamps() { diff --git a/warehouse/query-core/src/main/java/datawave/query/discovery/TermInfoAggregation.java b/warehouse/query-core/src/main/java/datawave/query/discovery/TermInfoAggregation.java index a8b82a790a7..960a83e1889 100644 --- a/warehouse/query-core/src/main/java/datawave/query/discovery/TermInfoAggregation.java +++ b/warehouse/query-core/src/main/java/datawave/query/discovery/TermInfoAggregation.java @@ -21,7 +21,6 @@ public class TermInfoAggregation implements Function,Discov private static final Logger log = Logger.getLogger(TermInfoAggregation.class); private Set columnVisibilities = Sets.newHashSet(); - private static MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); private final boolean separateCountsByColumnVisibility; private boolean showReferenceCountInsteadOfTermCount = false; private boolean reverseIndex = false; @@ -74,7 +73,7 @@ public DiscoveredThing apply(Collection from) { chosenCount = showReferenceCountInsteadOfTermCount ? referenceCount : termCount; try { - markingFunctions.translateFromColumnVisibility(ti.vis); // just to test parsing + MarkingFunctions.Factory.createMarkingFunctions().translateFromColumnVisibility(ti.vis); // just to test parsing columnVisibilities.add(ti.vis); // Keep track of counts for individual vis @@ -108,7 +107,7 @@ public DiscoveredThing apply(Collection from) { ColumnVisibility columnVisibility = null; try { - columnVisibility = markingFunctions.combine(columnVisibilities); + columnVisibility = MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); } catch (Exception e) { log.warn("Invalid columnvisibility after combining!", e); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterators/FieldIndexCountingIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterators/FieldIndexCountingIterator.java index 263dd99ee3d..360f604b258 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterators/FieldIndexCountingIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterators/FieldIndexCountingIterator.java @@ -91,8 +91,6 @@ public class FieldIndexCountingIterator extends WrappingIterator implements Sort private Set visibilitySet = new HashSet<>(); - protected static final MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); - // ------------------------------------------------------------------------- // ------------- Constructors public FieldIndexCountingIterator() { @@ -588,7 +586,7 @@ private Key buildReturnKey() { } ColumnVisibility cv; try { - cv = markingFunctions.combine(columnVisibilities); + cv = MarkingFunctions.Factory.createMarkingFunctions().combine(columnVisibilities); } catch (MarkingFunctions.Exception e) { log.error("Could not combine visibilities: " + visibilitySet + " " + e); return null; diff --git a/web-services/metrics/src/main/java/datawave/query/metrics/ShardTableQueryMetricHandler.java b/web-services/metrics/src/main/java/datawave/query/metrics/ShardTableQueryMetricHandler.java index 4ce461b03d9..1e067dbdc83 100644 --- a/web-services/metrics/src/main/java/datawave/query/metrics/ShardTableQueryMetricHandler.java +++ b/web-services/metrics/src/main/java/datawave/query/metrics/ShardTableQueryMetricHandler.java @@ -137,7 +137,6 @@ public class ShardTableQueryMetricHandler extends BaseQueryMetricHandler connectorAuthorizationCollection = null; private String connectorAuthorizations = null; - private MarkingFunctions markingFunctions = null; @SuppressWarnings("FieldCanBeLocal") private final String JOB_ID = "job_201109071404_1"; @@ -165,7 +164,6 @@ public ShardTableQueryMetricHandler() { String accumuloPassword = conf.get("AccumuloRecordWriter.password"); byte[] encodedAccumuloPassword = Base64.encodeBase64(accumuloPassword.getBytes()); conf.set("AccumuloRecordWriter.password", new String(encodedAccumuloPassword)); - markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); } @PostConstruct @@ -286,11 +284,12 @@ private Multimap getEntries(AbstractColumnBasedHandler event.setDate(storedQueryMetric.getCreateDate().getTime()); // get security markings from metric, otherwise default to PUBLIC Map markings = updatedQueryMetric.getMarkings(); - if (markingFunctions == null || markings == null || markings.isEmpty()) { + if (markings == null || markings.isEmpty()) { event.setVisibility(new ColumnVisibility(DEFAULT_SECURITY_MARKING)); } else { try { - event.setVisibility(this.markingFunctions.translateToColumnVisibility(markings)); + MarkingFunctions markingFunctions = MarkingFunctions.Factory.createMarkingFunctions(); + event.setVisibility(markingFunctions.translateToColumnVisibility(markings)); } catch (Exception e) { log.error(e.getMessage(), e); event.setVisibility(new ColumnVisibility(DEFAULT_SECURITY_MARKING)); From a6d488bbfe64fa476e9084a5217715f941b59868 Mon Sep 17 00:00:00 2001 From: Moon Moon Date: Fri, 10 Jan 2025 13:29:57 -0500 Subject: [PATCH 06/16] Removing unused reduced response from various Attribute methods (#2630) * Removing unused calls from Attribute-based classes --- .../datawave/query/attributes/Attribute.java | 8 ++--- .../datawave/query/attributes/Attributes.java | 14 ++------ .../query/attributes/Cardinality.java | 14 ++------ .../datawave/query/attributes/Content.java | 18 +++------- .../query/attributes/DateContent.java | 14 ++------ .../query/attributes/DiacriticContent.java | 14 ++------ .../datawave/query/attributes/Document.java | 35 ++++--------------- .../query/attributes/DocumentKey.java | 14 ++------ .../datawave/query/attributes/GeoPoint.java | 14 ++------ .../datawave/query/attributes/Geometry.java | 14 ++------ .../datawave/query/attributes/IpAddress.java | 14 ++------ .../datawave/query/attributes/Latitude.java | 14 ++------ .../datawave/query/attributes/Longitude.java | 14 ++------ .../datawave/query/attributes/Numeric.java | 14 ++------ .../attributes/PreNormalizedAttribute.java | 14 ++------ .../query/attributes/TypeAttribute.java | 14 ++------ .../query/function/DataTypeAsField.java | 2 +- .../query/function/DocumentProjection.java | 12 +++---- .../query/function/FacetedGrouping.java | 2 +- .../datawave/query/function/GroupFields.java | 2 +- .../KryoCVAwareSerializableSerializer.java | 2 +- .../datawave/query/function/LimitFields.java | 2 +- .../query/function/RemoveGroupingContext.java | 2 +- .../WritableDocumentSerializer.java | 2 +- .../query/iterator/GroupingIterator.java | 2 +- .../query/transformer/ContentTransform.java | 2 +- .../transformer/FieldMappingTransform.java | 2 +- .../transformer/FieldRenameTransform.java | 2 +- .../common/grouping/DocumentGrouperTest.java | 4 +-- .../transformer/FieldRenameTransformTest.java | 6 ++-- .../transformer/UniqueTransformTest.java | 2 +- 31 files changed, 63 insertions(+), 226 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java index 976f3105940..b425e6451f0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java @@ -160,7 +160,7 @@ protected void clearMetadata() { metadata = null; } - protected void writeMetadata(DataOutput out, Boolean reducedResponse) throws IOException { + protected void writeMetadata(DataOutput out) throws IOException { out.writeBoolean(isMetadataSet()); if (isMetadataSet()) { byte[] cvBytes = getColumnVisibility().getExpression(); @@ -172,7 +172,7 @@ protected void writeMetadata(DataOutput out, Boolean reducedResponse) throws IOE } } - protected void writeMetadata(Kryo kryo, Output output, Boolean reducedResponse) { + protected void writeMetadata(Kryo kryo, Output output) { output.writeBoolean(isMetadataSet()); if (isMetadataSet()) { byte[] cvBytes = getColumnVisibility().getExpression(); @@ -324,9 +324,9 @@ public Attribute reduceToKeep() { } } - public abstract void write(DataOutput output, boolean reducedResponse) throws IOException; + public abstract void write(DataOutput output) throws IOException; - public abstract void write(Kryo kryo, Output output, Boolean reducedResponse); + public abstract void write(Kryo kryo, Output output); public abstract Object getData(); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java index 50a2e5ea684..2701959bd9e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Attributes.java @@ -111,11 +111,6 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { WritableUtils.writeVInt(out, _count); out.writeBoolean(trackSizes); // Write out the number of Attributes we're going to store @@ -126,7 +121,7 @@ public void write(DataOutput out, boolean reducedResponse) throws IOException { WritableUtils.writeString(out, attr.getClass().getName()); // Defer to the concrete instance to write() itself - attr.write(out, reducedResponse); + attr.write(out); } } @@ -284,11 +279,6 @@ public Attribute reduceToKeep() { @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { output.writeInt(this._count, true); output.writeBoolean(this.trackSizes); // Write out the number of Attributes we're going to store @@ -299,7 +289,7 @@ public void write(Kryo kryo, Output output, Boolean reducedResponse) { output.writeString(attr.getClass().getName()); // Defer to the concrete instance to write() itself - attr.write(kryo, output, reducedResponse); + attr.write(kryo, output); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Cardinality.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Cardinality.java index e2f91a2d2a0..1f430fab40e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Cardinality.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Cardinality.java @@ -62,12 +62,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, content.fieldName); WritableUtils.writeString(out, content.lower); WritableUtils.writeString(out, content.upper); @@ -151,12 +146,7 @@ public int hashCode() { @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - super.writeMetadata(kryo, output, reducedResponse); + super.writeMetadata(kryo, output); output.writeString(this.content.fieldName); output.writeString(this.content.lower); output.writeString(this.content.upper); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Content.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Content.java index 4f31949b33c..e1aaf04ccba 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Content.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Content.java @@ -62,18 +62,13 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, content); WritableUtils.writeVInt(out, toKeep ? 1 : 0); out.writeBoolean(source != null); if (source != null) { WritableUtils.writeString(out, source.getClass().getCanonicalName()); - source.write(out, reducedResponse); + source.write(out); } } @@ -137,18 +132,13 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - super.writeMetadata(kryo, output, reducedResponse); + super.writeMetadata(kryo, output); output.writeString(this.content); output.writeBoolean(this.toKeep); output.writeBoolean(this.source != null); if (source != null) { output.writeString(this.source.getClass().getCanonicalName()); - source.write(kryo, output, reducedResponse); + source.write(kryo, output); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/DateContent.java b/warehouse/query-core/src/main/java/datawave/query/attributes/DateContent.java index a548dffa102..33ac90f1e00 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/DateContent.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/DateContent.java @@ -102,12 +102,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, normalizer.parseToString(this.value.getTime())); } @@ -160,12 +155,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(normalizer.parseToString(this.value.getTime())); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/DiacriticContent.java b/warehouse/query-core/src/main/java/datawave/query/attributes/DiacriticContent.java index 3d41f494456..c23e07e6d45 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/DiacriticContent.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/DiacriticContent.java @@ -51,12 +51,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, content); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -108,12 +103,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.content); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java index 8bb3d146589..68448abcaa9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Document.java @@ -238,7 +238,7 @@ public Attribute get(String key) { } public void put(String key, Attribute value) { - put(key, value, false, false); + put(key, value, false); } /** @@ -250,10 +250,8 @@ public void put(String key, Attribute value) { * a value * @param includeGroupingContext * flag to include grouping context - * @param reducedResponse - * flag for reducedResponse */ - public void replace(String key, Attribute value, Boolean includeGroupingContext, boolean reducedResponse) { + public void replace(String key, Attribute value, Boolean includeGroupingContext) { dict.put(key, value); } @@ -268,10 +266,8 @@ public void replace(String key, Attribute value, Boolean includeGroupingConte * the attribute value * @param includeGroupingContext * flag to include grouping context - * @param reducedResponse - * flag for reducedResponse */ - public void put(String key, Attribute value, Boolean includeGroupingContext, boolean reducedResponse) { + public void put(String key, Attribute value, Boolean includeGroupingContext) { if (0 == value.size()) { if (log.isTraceEnabled()) { @@ -383,25 +379,16 @@ public void put(String key, Attribute value, Boolean includeGroupingContext, public void put(Entry>> entry, Boolean includeGroupingContext) { // No grouping context in the document. - this.put(entry.getKey(), entry.getValue(), includeGroupingContext, false); - } - - public void put(Entry>> entry, Boolean includeGroupingContext, boolean reducedResponse) { - // No grouping context in the document. - this.put(entry.getKey(), entry.getValue(), includeGroupingContext, reducedResponse); + this.put(entry.getKey(), entry.getValue(), includeGroupingContext); } public void putAll(Iterator>>> iterator, Boolean includeGroupingContext) { - putAll(iterator, includeGroupingContext, false); - } - - public void putAll(Iterator>>> iterator, Boolean includeGroupingContext, boolean reducedResponse) { if (null == iterator) { return; } while (iterator.hasNext()) { - put(iterator.next(), includeGroupingContext, reducedResponse); + put(iterator.next(), includeGroupingContext); } } @@ -546,11 +533,6 @@ public Attribute reduceToKeep() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { WritableUtils.writeVInt(out, _count); out.writeBoolean(trackSizes); WritableUtils.writeVLong(out, _bytes); @@ -782,11 +764,6 @@ public Collection visit(Collection queryFieldNames, Datawave @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { output.writeInt(this._count, true); output.writeBoolean(trackSizes); output.writeLong(this._bytes, true); @@ -801,7 +778,7 @@ public void write(Kryo kryo, Output output, Boolean reducedResponse) { Attribute attribute = entry.getValue(); output.writeString(attribute.getClass().getName()); - attribute.write(kryo, output, reducedResponse); + attribute.write(kryo, output); } output.writeLong(this.shardTimestamp); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/DocumentKey.java b/warehouse/query-core/src/main/java/datawave/query/attributes/DocumentKey.java index ebf06d7cbd9..19928c386a5 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/DocumentKey.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/DocumentKey.java @@ -85,12 +85,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, getShardId()); WritableUtils.writeString(out, getDataType()); WritableUtils.writeString(out, getUid()); @@ -133,12 +128,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - super.writeMetadata(kryo, output, reducedResponse); + super.writeMetadata(kryo, output); output.writeString(this.getShardId()); output.writeString(this.getDataType()); output.writeString(this.getUid()); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/GeoPoint.java b/warehouse/query-core/src/main/java/datawave/query/attributes/GeoPoint.java index c1eae1841ae..2546a800580 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/GeoPoint.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/GeoPoint.java @@ -60,12 +60,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, this.point); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -134,12 +129,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.point); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Geometry.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Geometry.java index 705278d6212..d3232df9a5d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Geometry.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Geometry.java @@ -75,12 +75,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeCompressedByteArray(out, write()); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -154,12 +149,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeBoolean(this.toKeep); byte[] wellKnownBinary = write(); output.writeInt(wellKnownBinary.length); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/IpAddress.java b/warehouse/query-core/src/main/java/datawave/query/attributes/IpAddress.java index 01550a48341..4c2c334ef74 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/IpAddress.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/IpAddress.java @@ -61,12 +61,7 @@ private void setNormalizedValue(String value) { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, this.value.toString()); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -122,12 +117,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.value.toString()); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Latitude.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Latitude.java index be7f1ecade3..3eadc7da6e2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Latitude.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Latitude.java @@ -61,12 +61,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, this.latitude); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -134,12 +129,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.latitude); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Longitude.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Longitude.java index 1f0960fc62b..6c998ddddf4 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Longitude.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Longitude.java @@ -61,12 +61,7 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, this.longitude); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -134,12 +129,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.longitude); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Numeric.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Numeric.java index f18e17d861e..80a154b5dc2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Numeric.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Numeric.java @@ -110,12 +110,7 @@ private void setNormalizedValue(Number value) { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, normalizedValue); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -173,12 +168,7 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - writeMetadata(kryo, output, reducedResponse); + writeMetadata(kryo, output); output.writeString(this.normalizedValue); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/PreNormalizedAttribute.java b/warehouse/query-core/src/main/java/datawave/query/attributes/PreNormalizedAttribute.java index 1858ac6e6a5..3bdb3e68f68 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/PreNormalizedAttribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/PreNormalizedAttribute.java @@ -77,12 +77,7 @@ public int compareTo(PreNormalizedAttribute o) { @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { - super.writeMetadata(kryo, output, reducedResponse); + super.writeMetadata(kryo, output); output.writeString(this.value); output.writeBoolean(this.toKeep); } @@ -96,12 +91,7 @@ public void read(Kryo kryo, Input input) { @Override public void write(DataOutput output) throws IOException { - write(output, false); - } - - @Override - public void write(DataOutput output, boolean reducedResponse) throws IOException { - super.writeMetadata(output, reducedResponse); + super.writeMetadata(output); WritableUtils.writeString(output, this.value); WritableUtils.writeVInt(output, toKeep ? 1 : 0); } diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/TypeAttribute.java b/warehouse/query-core/src/main/java/datawave/query/attributes/TypeAttribute.java index 6dd0c7185bd..4ad28d7f4a5 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/TypeAttribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/TypeAttribute.java @@ -58,13 +58,8 @@ public Object getData() { @Override public void write(DataOutput out) throws IOException { - write(out, false); - } - - @Override - public void write(DataOutput out, boolean reducedResponse) throws IOException { WritableUtils.writeString(out, datawaveType.getClass().toString()); - writeMetadata(out, reducedResponse); + writeMetadata(out); WritableUtils.writeString(out, datawaveType.getDelegateAsString()); WritableUtils.writeVInt(out, toKeep ? 1 : 0); } @@ -131,13 +126,8 @@ public Collection visit(Collection fieldNames, DatawaveJexlC @Override public void write(Kryo kryo, Output output) { - write(kryo, output, false); - } - - @Override - public void write(Kryo kryo, Output output, Boolean reducedResponse) { output.writeString(datawaveType.getClass().getName()); - super.writeMetadata(kryo, output, reducedResponse); + super.writeMetadata(kryo, output); output.writeString(this.datawaveType.getDelegateAsString()); output.writeBoolean(this.toKeep); } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/DataTypeAsField.java b/warehouse/query-core/src/main/java/datawave/query/function/DataTypeAsField.java index 2ad6ef47fec..3881b1ebb6d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/DataTypeAsField.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/DataTypeAsField.java @@ -29,7 +29,7 @@ public DataTypeAsField(String key) { @Override public Entry apply(Entry from) { Content dataType = extractDataType(from.getKey(), from.getValue().isToKeep()); - from.getValue().put(key, dataType, false, false); + from.getValue().put(key, dataType, false); return from; } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java b/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java index e676eb118bd..4f5f0424389 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/DocumentProjection.java @@ -121,7 +121,7 @@ private Document trim(Document d) { Document newSubDoc = trim((Document) attr); if (0 < newSubDoc.size()) { - newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse); + newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext); } continue; @@ -129,7 +129,7 @@ private Document trim(Document d) { Attributes subAttrs = trim((Attributes) attr, fieldName); if (0 < subAttrs.size()) { - newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse); + newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext); } continue; @@ -137,7 +137,7 @@ private Document trim(Document d) { } // We just want to add this subtree - newDoc.put(fieldName, (Attribute) attr.copy(), this.includeGroupingContext, this.reducedResponse); + newDoc.put(fieldName, (Attribute) attr.copy(), this.includeGroupingContext); } else if (!projection.isUseExcludes()) { // excludes will completely exclude a subtree, but an includes may @@ -147,16 +147,16 @@ private Document trim(Document d) { Document newSubDoc = trim((Document) attr); if (0 < newSubDoc.size()) { - newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext, this.reducedResponse); + newDoc.put(fieldName, newSubDoc.copy(), this.includeGroupingContext); } } else if (attr instanceof Attributes) { - // Since Document instances can be nested under attributes and vice-versa + // Since Document instances can be nested under attributes and vice versa // all the way down, we need to pass along the fieldName so that when we // have come up with a nested document it can be evaluated by its own name Attributes subAttrs = trim((Attributes) attr, fieldName); if (0 < subAttrs.size()) { - newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext, this.reducedResponse); + newDoc.put(fieldName, subAttrs.copy(), this.includeGroupingContext); } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/FacetedGrouping.java b/warehouse/query-core/src/main/java/datawave/query/function/FacetedGrouping.java index 7ce2111c559..78b31d4f8cd 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/FacetedGrouping.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/FacetedGrouping.java @@ -142,7 +142,7 @@ public Entry apply(Entry input) { if (log.isTraceEnabled()) log.trace("entries" + newDocumentAttributes.entries()); for (Entry> newAttr : newDocumentAttributes.entries()) { - currentDoc.replace(newAttr.getKey(), newAttr.getValue(), false, false); + currentDoc.replace(newAttr.getKey(), newAttr.getValue(), false); } if (log.isTraceEnabled()) log.trace("currentDoc" + currentDoc); diff --git a/warehouse/query-core/src/main/java/datawave/query/function/GroupFields.java b/warehouse/query-core/src/main/java/datawave/query/function/GroupFields.java index bd49cc5942c..43483e64a61 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/GroupFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/GroupFields.java @@ -83,7 +83,7 @@ private void applyCounts(Document doc, Map groupFieldsMap) { for (Entry groupFieldCountEntry : groupFieldsMap.entrySet()) { Attribute removedAttr = doc.remove(groupFieldCountEntry.getKey()); log.debug("removed from document:" + groupFieldCountEntry.getKey()); - doc.put(groupFieldCountEntry.getKey(), new Numeric(groupFieldCountEntry.getValue(), doc.getMetadata(), removedAttr.isToKeep()), true, false); + doc.put(groupFieldCountEntry.getKey(), new Numeric(groupFieldCountEntry.getValue(), doc.getMetadata(), removedAttr.isToKeep()), true); log.debug("added to document:" + groupFieldCountEntry.getKey() + " with count of " + groupFieldCountEntry.getValue()); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/KryoCVAwareSerializableSerializer.java b/warehouse/query-core/src/main/java/datawave/query/function/KryoCVAwareSerializableSerializer.java index 56a75e97c97..af6379cc913 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/KryoCVAwareSerializableSerializer.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/KryoCVAwareSerializableSerializer.java @@ -25,7 +25,7 @@ public void setReducedResponse(Boolean reducedResponse) { @Override public void write(Kryo kryo, Output output, KryoSerializable object) { if (object instanceof Document) { - ((Document) object).write(kryo, output, getReducedResponse()); + ((Document) object).write(kryo, output); } else { object.write(kryo, output); } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java index 9a7e9aaa941..9c879c47192 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/LimitFields.java @@ -259,7 +259,7 @@ public Entry apply(Entry entry) { int keepers = countKeepersForFieldMap.get(keyNoGrouping); int originalCount = countForFieldMap.get(keyNoGrouping); if (originalCount > keepers) { - document.put(keyNoGrouping + ORIGINAL_COUNT_SUFFIX, new Numeric(originalCount, document.getMetadata(), document.isToKeep()), true, false); + document.put(keyNoGrouping + ORIGINAL_COUNT_SUFFIX, new Numeric(originalCount, document.getMetadata(), document.isToKeep()), true); // some sanity checks int missesRemaining = countMissesRemainingForFieldMap.get(keyNoGrouping); diff --git a/warehouse/query-core/src/main/java/datawave/query/function/RemoveGroupingContext.java b/warehouse/query-core/src/main/java/datawave/query/function/RemoveGroupingContext.java index 9e12e02173f..6573ff9c62c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/RemoveGroupingContext.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/RemoveGroupingContext.java @@ -34,7 +34,7 @@ public Entry apply(Entry entry) { } // put them all back without the grouping context for (Entry>> goner : toRemove) { - entry.getValue().put(goner.getKey(), goner.getValue(), false, false); + entry.getValue().put(goner.getKey(), goner.getValue(), false); } return entry; } diff --git a/warehouse/query-core/src/main/java/datawave/query/function/serializer/WritableDocumentSerializer.java b/warehouse/query-core/src/main/java/datawave/query/function/serializer/WritableDocumentSerializer.java index 1a85136aa1f..822dedd1612 100644 --- a/warehouse/query-core/src/main/java/datawave/query/function/serializer/WritableDocumentSerializer.java +++ b/warehouse/query-core/src/main/java/datawave/query/function/serializer/WritableDocumentSerializer.java @@ -26,7 +26,7 @@ public byte[] serialize(Document doc) { DataOutputStream dos = new DataOutputStream(baos); try { - doc.write(dos, reducedResponse); + doc.write(dos); } catch (IOException e) { throw new RuntimeException("Could not convert Document through write().", e); } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java index 557bfa5da6b..f7aa52e2d12 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/GroupingIterator.java @@ -194,7 +194,7 @@ private Document flatten(List documents) { attribute.setColumnVisibility(entry.getValue().getColumnVisibility()); // Call copy() on the GroupingTypeAttribute to get a plain TypeAttribute instead of a GroupingTypeAttribute that is package protected and won't // serialize. - flattened.put(entry.getKey() + "." + Integer.toHexString(context).toUpperCase(), (TypeAttribute) attribute.copy(), true, false); + flattened.put(entry.getKey() + "." + Integer.toHexString(context).toUpperCase(), (TypeAttribute) attribute.copy(), true); } // Increment the context by one. context++; diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/ContentTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/ContentTransform.java index 02c2f45fba6..15030c84086 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/ContentTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/ContentTransform.java @@ -38,7 +38,7 @@ public Map.Entry apply(@Nullable Map.Entry keyDocume Attribute contentField = document.remove(contentFieldName); if (contentField.getData().toString().equalsIgnoreCase("true")) { Content c = new Content(uid, contentField.getMetadata(), document.isToKeep()); - document.put(contentFieldName, c, false, this.reducedResponse); + document.put(contentFieldName, c, false); } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/FieldMappingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/FieldMappingTransform.java index c5f2210eca7..3d82452ea11 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/FieldMappingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/FieldMappingTransform.java @@ -33,7 +33,7 @@ public Map.Entry apply(@Nullable Map.Entry keyDocume if (!document.containsKey(primaryField)) { for (String secondaryField : this.primaryToSecondaryFieldMap.get(primaryField)) { if (document.containsKey(secondaryField)) { - document.put(primaryField, document.get(secondaryField), includeGroupingContext, this.reducedResponse); + document.put(primaryField, document.get(secondaryField), includeGroupingContext); break; } } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/FieldRenameTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/FieldRenameTransform.java index dc0f0e6269d..fd4cad3a35d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/FieldRenameTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/FieldRenameTransform.java @@ -60,7 +60,7 @@ public Map.Entry apply(@Nullable Map.Entry keyDocume for (String mappedField : mappedFields) { if (!mappedField.equals(baseField)) { String newField = field.replace(baseField, mappedField); - document.put(newField, document.get(field), this.includeGroupingContext, this.reducedResponse); + document.put(newField, document.get(field), this.includeGroupingContext); } } if (!mappedFields.contains(baseField)) { diff --git a/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java index 9afa59ec019..93bba6524fd 100644 --- a/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/common/grouping/DocumentGrouperTest.java @@ -1163,9 +1163,9 @@ public void addEntryTo(Document document) { if (attributes.isEmpty()) { throw new IllegalArgumentException("No attributes set for document entry"); } else if (attributes.size() == 1) { - document.put(fieldName, this.attributes.get(0), true, false); + document.put(fieldName, this.attributes.get(0), true); } else { - document.put(fieldName, new Attributes(this.attributes, true), true, false); + document.put(fieldName, new Attributes(this.attributes, true), true); } } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/FieldRenameTransformTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/FieldRenameTransformTest.java index dcbedc67414..d9e179e3153 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/FieldRenameTransformTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/FieldRenameTransformTest.java @@ -106,9 +106,9 @@ public void renameWithGroupingContextAndMultipleMappings() throws MarkingFunctio fieldMap.add("field1=field6"); Document d = new Document(); - d.put("field1.field.11", new Numeric("1", key, true), true, false); - d.put("field2.field.12", new Numeric("2", key, true), true, false); - d.put("field3.field.13", new Numeric("3", key, true), true, false); + d.put("field1.field.11", new Numeric("1", key, true), true); + d.put("field2.field.12", new Numeric("2", key, true), true); + d.put("field3.field.13", new Numeric("3", key, true), true); DocumentTransform transformer = new FieldRenameTransform(fieldMap, true, false); diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java index ba0354c12e1..3eb407a43a3 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java @@ -598,7 +598,7 @@ private String getRandomValue() { } InputDocumentBuilder withKeyValue(String key, String value) { - document.put(key, new DiacriticContent(value, document.getMetadata(), true), true, false); + document.put(key, new DiacriticContent(value, document.getMetadata(), true), true); return this; } From cdde21fb73205b713467fdb680683044d657d03a Mon Sep 17 00:00:00 2001 From: Ivan Bella <347158+ivakegg@users.noreply.github.com> Date: Fri, 10 Jan 2025 18:38:34 +0000 Subject: [PATCH 07/16] 7.14.0 --- common-test/pom.xml | 2 +- contrib/datawave-quickstart/docker/pom.xml | 2 +- core/cached-results/pom.xml | 2 +- core/common-util/pom.xml | 2 +- core/common/pom.xml | 2 +- core/connection-pool/pom.xml | 2 +- core/map-reduce/pom.xml | 2 +- core/modification/pom.xml | 2 +- core/pom.xml | 2 +- core/query/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/age-off-utils/pom.xml | 2 +- warehouse/age-off/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-ssdeep/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- warehouse/ssdeep-common/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/metrics/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 73 files changed, 73 insertions(+), 73 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index fbc23678c5f..58cc2d57bbf 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/docker/pom.xml b/contrib/datawave-quickstart/docker/pom.xml index 4654e03c5d7..9e8b8878773 100644 --- a/contrib/datawave-quickstart/docker/pom.xml +++ b/contrib/datawave-quickstart/docker/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 ../../../pom.xml quickstart diff --git a/core/cached-results/pom.xml b/core/cached-results/pom.xml index 55a29764efd..5e8d74ee729 100644 --- a/core/cached-results/pom.xml +++ b/core/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-cached-results ${project.artifactId} diff --git a/core/common-util/pom.xml b/core/common-util/pom.xml index 69474bf0727..12d7f9309dc 100644 --- a/core/common-util/pom.xml +++ b/core/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-common-util ${project.artifactId} diff --git a/core/common/pom.xml b/core/common/pom.xml index 43dc72494cc..b08ef93286f 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-common ${project.artifactId} diff --git a/core/connection-pool/pom.xml b/core/connection-pool/pom.xml index b0605d6bc9c..aace93f68c5 100644 --- a/core/connection-pool/pom.xml +++ b/core/connection-pool/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-connection-pool ${project.artifactId} diff --git a/core/map-reduce/pom.xml b/core/map-reduce/pom.xml index 188fdc32cb5..6d2a2eb60c0 100644 --- a/core/map-reduce/pom.xml +++ b/core/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-map-reduce ${project.artifactId} diff --git a/core/modification/pom.xml b/core/modification/pom.xml index 71d992f5758..83b808aa295 100644 --- a/core/modification/pom.xml +++ b/core/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-modification ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index 0661dcc5306..0edba4a179d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 gov.nsa.datawave.core datawave-core-parent diff --git a/core/query/pom.xml b/core/query/pom.xml index de90c5439de..105d2b86bf2 100644 --- a/core/query/pom.xml +++ b/core/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core-query ${project.artifactId} diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 5ed3ac15a91..9d5d9d110cd 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0-SNAPSHOT + 7.14.0 gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index e81315e71a0..c13a63eb4da 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index bd4754c3fca..353e155fd5d 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index d15a19b13cd..4693df1a32c 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index cea898e34bc..60e7638b98d 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index e3a202412a6..7db793f3dff 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index 635d99fa0f5..a6d6eeca426 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/age-off-utils/pom.xml b/warehouse/age-off-utils/pom.xml index 3143df48f7d..d98e5353b45 100644 --- a/warehouse/age-off-utils/pom.xml +++ b/warehouse/age-off-utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-age-off-utils ${project.artifactId} diff --git a/warehouse/age-off/pom.xml b/warehouse/age-off/pom.xml index 209b787775b..d175826977e 100644 --- a/warehouse/age-off/pom.xml +++ b/warehouse/age-off/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-age-off ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 08bb03c2ad2..0ffa7478ec1 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.14.0-SNAPSHOT + 7.14.0 assemble-datawave jar diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index d5995e4e246..a74410a9ccc 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index 15df6429028..d677ed50103 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.14.0-SNAPSHOT + 7.14.0 assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 4abb6170c92..5271dbf3dfa 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 8eda4ab399a..13acd12da53 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index 4d1f0a5d6d3..ada91aab262 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 7fef2af5aea..1128baa0714 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index a1afa6009d3..7a1d65e6f12 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index aace1d2f9a6..bbe4737ea9f 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index c872c25840b..95cd249b430 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 4aa75ea7c98..66cf556e1c5 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index e11abd91a50..ca2c9caba23 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index e6d5e728d6b..327f50b9a2a 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index 23a1e339ff4..ab15851399f 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 2a1a58a8ad9..2ecaa643b42 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-ssdeep/pom.xml b/warehouse/ingest-ssdeep/pom.xml index 8880548c931..8cd9f5fb835 100644 --- a/warehouse/ingest-ssdeep/pom.xml +++ b/warehouse/ingest-ssdeep/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-ssdeep diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index 6e1d0be9802..83b2b06a923 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 13d08f1c2e1..63ce271c288 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 436882d33d8..095546f7c77 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index a060e0fecbc..0ecedf212c0 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index d3217a980a0..eb6dd49d69a 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index 1ba8b40b6e1..aed5f2b9ca2 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index c21ddfb8f76..64300b4d945 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index af9b2ee69f4..1a3914e166f 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-regression-testing ${project.artifactId} diff --git a/warehouse/ssdeep-common/pom.xml b/warehouse/ssdeep-common/pom.xml index 795b07133bf..3a88da068d3 100644 --- a/warehouse/ssdeep-common/pom.xml +++ b/warehouse/ssdeep-common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ssdeep-common diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index b2b1cc31a04..516818fe1aa 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index 8f05782cb55..601aec79d8c 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index f5f3d3a59a5..37bc5c05f15 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 0f9d305fceb..3f0a7fd18fc 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index 7e864febec6..a2c35d504ac 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index 037e3f1c048..f933600d210 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 530c7d228f6..70c7e85d2cc 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index b35ce86386b..04cf3f157f6 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index dfcdbeffdcd..89e96d95e1f 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index b520bf5a80c..e83ec3087a7 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index ac7344f1028..aeccf97f9ef 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0-SNAPSHOT + 7.14.0 spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index 529ac31f810..7ac930510f3 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 329e08a01f9..00fa98a8bf7 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 6f95e154537..3956cf1896e 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 5a9a628792d..61ad2960d0a 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index 84fc3c573b9..c90c61ce4cb 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index faf6b8660f1..d1c53d16b46 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 813c0b801e7..13b36289a12 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 41cc05335d2..9ee0c406dab 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 3a06f4950bf..9a4021ef621 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-map-reduce ejb diff --git a/web-services/metrics/pom.xml b/web-services/metrics/pom.xml index 5fc064eb7dd..283fcceee9f 100644 --- a/web-services/metrics/pom.xml +++ b/web-services/metrics/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-metrics ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 0db6186c209..1447beb9833 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 3e856b48441..6126cc290d0 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 5887806da9e..2a4feae2c4e 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0-SNAPSHOT + 7.14.0 gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index a1e50539dab..a103976c7c0 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index 83fb4234fec..1c316d12d8f 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index bc81188ade0..2fb85df0ccf 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index 020c2dd1a5f..da96be125d2 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 52133ee5b36..97e79929792 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0-SNAPSHOT + 7.14.0 datawave-ws-web-root war From 01270ec9b7be690071731fa2e2b1aec7cdc50780 Mon Sep 17 00:00:00 2001 From: Ivan Bella <347158+ivakegg@users.noreply.github.com> Date: Fri, 10 Jan 2025 18:38:47 +0000 Subject: [PATCH 08/16] 7.15.0-SNAPSHOT --- common-test/pom.xml | 2 +- contrib/datawave-quickstart/docker/pom.xml | 2 +- core/cached-results/pom.xml | 2 +- core/common-util/pom.xml | 2 +- core/common/pom.xml | 2 +- core/connection-pool/pom.xml | 2 +- core/map-reduce/pom.xml | 2 +- core/modification/pom.xml | 2 +- core/pom.xml | 2 +- core/query/pom.xml | 2 +- core/utils/pom.xml | 2 +- docs/pom.xml | 2 +- microservices/pom.xml | 2 +- microservices/services/pom.xml | 2 +- microservices/starters/pom.xml | 2 +- pom.xml | 2 +- warehouse/accumulo-extensions/pom.xml | 2 +- warehouse/age-off-utils/pom.xml | 2 +- warehouse/age-off/pom.xml | 2 +- warehouse/assemble/datawave/pom.xml | 2 +- warehouse/assemble/pom.xml | 2 +- warehouse/assemble/webservice/pom.xml | 2 +- warehouse/common/pom.xml | 2 +- warehouse/core/pom.xml | 2 +- warehouse/data-dictionary-core/pom.xml | 2 +- warehouse/edge-dictionary-core/pom.xml | 2 +- warehouse/edge-model-configuration-core/pom.xml | 2 +- warehouse/index-stats/pom.xml | 2 +- warehouse/ingest-configuration/pom.xml | 2 +- warehouse/ingest-core/pom.xml | 2 +- warehouse/ingest-csv/pom.xml | 2 +- warehouse/ingest-json/pom.xml | 2 +- warehouse/ingest-nyctlc/pom.xml | 2 +- warehouse/ingest-scripts/pom.xml | 2 +- warehouse/ingest-ssdeep/pom.xml | 2 +- warehouse/ingest-wikipedia/pom.xml | 2 +- warehouse/metrics-core/pom.xml | 2 +- warehouse/ops-tools/config-compare/pom.xml | 2 +- warehouse/ops-tools/index-validation/pom.xml | 2 +- warehouse/ops-tools/pom.xml | 2 +- warehouse/pom.xml | 2 +- warehouse/query-core/pom.xml | 2 +- warehouse/regression-testing/pom.xml | 2 +- warehouse/ssdeep-common/pom.xml | 2 +- web-services/accumulo/pom.xml | 2 +- web-services/atom/pom.xml | 2 +- web-services/cached-results/pom.xml | 2 +- web-services/client/pom.xml | 2 +- web-services/common-util/pom.xml | 2 +- web-services/common/pom.xml | 2 +- web-services/deploy/application/pom.xml | 2 +- web-services/deploy/configuration/pom.xml | 2 +- web-services/deploy/docs/pom.xml | 2 +- web-services/deploy/pom.xml | 2 +- web-services/deploy/spring-framework-integration/pom.xml | 2 +- web-services/dictionary/pom.xml | 2 +- web-services/examples/client-login/pom.xml | 2 +- web-services/examples/http-client/pom.xml | 2 +- web-services/examples/jms-client/pom.xml | 2 +- web-services/examples/pom.xml | 2 +- web-services/examples/query-war/pom.xml | 2 +- web-services/map-reduce-embedded/pom.xml | 2 +- web-services/map-reduce-status/pom.xml | 2 +- web-services/map-reduce/pom.xml | 2 +- web-services/metrics/pom.xml | 2 +- web-services/model/pom.xml | 2 +- web-services/modification/pom.xml | 2 +- web-services/pom.xml | 2 +- web-services/query-websocket/pom.xml | 2 +- web-services/query/pom.xml | 2 +- web-services/rest-api/pom.xml | 2 +- web-services/security/pom.xml | 2 +- web-services/web-root/pom.xml | 2 +- 73 files changed, 73 insertions(+), 73 deletions(-) diff --git a/common-test/pom.xml b/common-test/pom.xml index 58cc2d57bbf..dcf70ae2fbb 100644 --- a/common-test/pom.xml +++ b/common-test/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-common-test ${project.artifactId} diff --git a/contrib/datawave-quickstart/docker/pom.xml b/contrib/datawave-quickstart/docker/pom.xml index 9e8b8878773..d27009a8ed7 100644 --- a/contrib/datawave-quickstart/docker/pom.xml +++ b/contrib/datawave-quickstart/docker/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT ../../../pom.xml quickstart diff --git a/core/cached-results/pom.xml b/core/cached-results/pom.xml index 5e8d74ee729..7080f4c795a 100644 --- a/core/cached-results/pom.xml +++ b/core/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-cached-results ${project.artifactId} diff --git a/core/common-util/pom.xml b/core/common-util/pom.xml index 12d7f9309dc..4f6a1403e9b 100644 --- a/core/common-util/pom.xml +++ b/core/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-common-util ${project.artifactId} diff --git a/core/common/pom.xml b/core/common/pom.xml index b08ef93286f..3a4e92a120c 100644 --- a/core/common/pom.xml +++ b/core/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-common ${project.artifactId} diff --git a/core/connection-pool/pom.xml b/core/connection-pool/pom.xml index aace93f68c5..5cd352e0cac 100644 --- a/core/connection-pool/pom.xml +++ b/core/connection-pool/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-connection-pool ${project.artifactId} diff --git a/core/map-reduce/pom.xml b/core/map-reduce/pom.xml index 6d2a2eb60c0..fae52b0e7aa 100644 --- a/core/map-reduce/pom.xml +++ b/core/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-map-reduce ${project.artifactId} diff --git a/core/modification/pom.xml b/core/modification/pom.xml index 83b808aa295..d1857e45d07 100644 --- a/core/modification/pom.xml +++ b/core/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-modification ${project.artifactId} diff --git a/core/pom.xml b/core/pom.xml index 0edba4a179d..aa424368bb5 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT gov.nsa.datawave.core datawave-core-parent diff --git a/core/query/pom.xml b/core/query/pom.xml index 105d2b86bf2..6d8bf3b7058 100644 --- a/core/query/pom.xml +++ b/core/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core-query ${project.artifactId} diff --git a/core/utils/pom.xml b/core/utils/pom.xml index 9d5d9d110cd..79569c5d660 100644 --- a/core/utils/pom.xml +++ b/core/utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.core datawave-core-parent - 7.14.0 + 7.15.0-SNAPSHOT gov.nsa.datawave.core datawave-utils-parent diff --git a/docs/pom.xml b/docs/pom.xml index c13a63eb4da..9e159c3fe2d 100644 --- a/docs/pom.xml +++ b/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-docs diff --git a/microservices/pom.xml b/microservices/pom.xml index 353e155fd5d..48bad565717 100644 --- a/microservices/pom.xml +++ b/microservices/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT gov.nsa.datawave.microservice datawave-microservice-build-parent diff --git a/microservices/services/pom.xml b/microservices/services/pom.xml index 4693df1a32c..1e4d5738857 100644 --- a/microservices/services/pom.xml +++ b/microservices/services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-microservice-service-build-parent pom diff --git a/microservices/starters/pom.xml b/microservices/starters/pom.xml index 60e7638b98d..03269ea0667 100644 --- a/microservices/starters/pom.xml +++ b/microservices/starters/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.microservice datawave-microservice-build-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-microservice-starter-build-parent pom diff --git a/pom.xml b/pom.xml index 7db793f3dff..b96ba02f806 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT pom DataWave DataWave is a Java-based ingest and query framework that leverages Apache Accumulo to provide fast, secure access to your data. diff --git a/warehouse/accumulo-extensions/pom.xml b/warehouse/accumulo-extensions/pom.xml index a6d6eeca426..eeccfec916d 100644 --- a/warehouse/accumulo-extensions/pom.xml +++ b/warehouse/accumulo-extensions/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-accumulo-extensions ${project.artifactId} diff --git a/warehouse/age-off-utils/pom.xml b/warehouse/age-off-utils/pom.xml index d98e5353b45..18bb50160cf 100644 --- a/warehouse/age-off-utils/pom.xml +++ b/warehouse/age-off-utils/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-age-off-utils ${project.artifactId} diff --git a/warehouse/age-off/pom.xml b/warehouse/age-off/pom.xml index d175826977e..1e0fe883efe 100644 --- a/warehouse/age-off/pom.xml +++ b/warehouse/age-off/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-age-off ${project.artifactId} diff --git a/warehouse/assemble/datawave/pom.xml b/warehouse/assemble/datawave/pom.xml index 0ffa7478ec1..734877489a0 100644 --- a/warehouse/assemble/datawave/pom.xml +++ b/warehouse/assemble/datawave/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.14.0 + 7.15.0-SNAPSHOT assemble-datawave jar diff --git a/warehouse/assemble/pom.xml b/warehouse/assemble/pom.xml index a74410a9ccc..d786a6c20f5 100644 --- a/warehouse/assemble/pom.xml +++ b/warehouse/assemble/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT assemble-parent pom diff --git a/warehouse/assemble/webservice/pom.xml b/warehouse/assemble/webservice/pom.xml index d677ed50103..1854c6910d9 100644 --- a/warehouse/assemble/webservice/pom.xml +++ b/warehouse/assemble/webservice/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave assemble-parent - 7.14.0 + 7.15.0-SNAPSHOT assemble-webservice ${project.artifactId} diff --git a/warehouse/common/pom.xml b/warehouse/common/pom.xml index 5271dbf3dfa..44312fb1943 100644 --- a/warehouse/common/pom.xml +++ b/warehouse/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-common ${project.artifactId} diff --git a/warehouse/core/pom.xml b/warehouse/core/pom.xml index 13acd12da53..cc642399a63 100644 --- a/warehouse/core/pom.xml +++ b/warehouse/core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-core jar diff --git a/warehouse/data-dictionary-core/pom.xml b/warehouse/data-dictionary-core/pom.xml index ada91aab262..3fc2803a408 100644 --- a/warehouse/data-dictionary-core/pom.xml +++ b/warehouse/data-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-data-dictionary-core jar diff --git a/warehouse/edge-dictionary-core/pom.xml b/warehouse/edge-dictionary-core/pom.xml index 1128baa0714..de97cfc0563 100644 --- a/warehouse/edge-dictionary-core/pom.xml +++ b/warehouse/edge-dictionary-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-edge-dictionary-core jar diff --git a/warehouse/edge-model-configuration-core/pom.xml b/warehouse/edge-model-configuration-core/pom.xml index 7a1d65e6f12..7a114a35918 100644 --- a/warehouse/edge-model-configuration-core/pom.xml +++ b/warehouse/edge-model-configuration-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-edge-model-configuration-core jar diff --git a/warehouse/index-stats/pom.xml b/warehouse/index-stats/pom.xml index bbe4737ea9f..a221de28164 100644 --- a/warehouse/index-stats/pom.xml +++ b/warehouse/index-stats/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-index-stats jar diff --git a/warehouse/ingest-configuration/pom.xml b/warehouse/ingest-configuration/pom.xml index 95cd249b430..41b977533f9 100644 --- a/warehouse/ingest-configuration/pom.xml +++ b/warehouse/ingest-configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-configuration diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 66cf556e1c5..e45839c5c09 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-core jar diff --git a/warehouse/ingest-csv/pom.xml b/warehouse/ingest-csv/pom.xml index ca2c9caba23..142d793bc4b 100644 --- a/warehouse/ingest-csv/pom.xml +++ b/warehouse/ingest-csv/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-csv jar diff --git a/warehouse/ingest-json/pom.xml b/warehouse/ingest-json/pom.xml index 327f50b9a2a..5a1e50d89a7 100644 --- a/warehouse/ingest-json/pom.xml +++ b/warehouse/ingest-json/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-json jar diff --git a/warehouse/ingest-nyctlc/pom.xml b/warehouse/ingest-nyctlc/pom.xml index ab15851399f..c95b136e18e 100644 --- a/warehouse/ingest-nyctlc/pom.xml +++ b/warehouse/ingest-nyctlc/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-nyctlc jar diff --git a/warehouse/ingest-scripts/pom.xml b/warehouse/ingest-scripts/pom.xml index 2ecaa643b42..da40c0614d0 100644 --- a/warehouse/ingest-scripts/pom.xml +++ b/warehouse/ingest-scripts/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-scripts ${project.artifactId} diff --git a/warehouse/ingest-ssdeep/pom.xml b/warehouse/ingest-ssdeep/pom.xml index 8cd9f5fb835..85c3f7707cb 100644 --- a/warehouse/ingest-ssdeep/pom.xml +++ b/warehouse/ingest-ssdeep/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-ssdeep diff --git a/warehouse/ingest-wikipedia/pom.xml b/warehouse/ingest-wikipedia/pom.xml index 83b2b06a923..07bc7e9f8ce 100644 --- a/warehouse/ingest-wikipedia/pom.xml +++ b/warehouse/ingest-wikipedia/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ingest-wikipedia jar diff --git a/warehouse/metrics-core/pom.xml b/warehouse/metrics-core/pom.xml index 63ce271c288..f7fd1f79d7b 100644 --- a/warehouse/metrics-core/pom.xml +++ b/warehouse/metrics-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-metrics-core jar diff --git a/warehouse/ops-tools/config-compare/pom.xml b/warehouse/ops-tools/config-compare/pom.xml index 095546f7c77..5c1a5866668 100644 --- a/warehouse/ops-tools/config-compare/pom.xml +++ b/warehouse/ops-tools/config-compare/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ops-tools-config-compare diff --git a/warehouse/ops-tools/index-validation/pom.xml b/warehouse/ops-tools/index-validation/pom.xml index 0ecedf212c0..7a7ab8416c4 100644 --- a/warehouse/ops-tools/index-validation/pom.xml +++ b/warehouse/ops-tools/index-validation/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-ops-tools-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ops-tools-index-validation jar diff --git a/warehouse/ops-tools/pom.xml b/warehouse/ops-tools/pom.xml index eb6dd49d69a..139ed74be35 100644 --- a/warehouse/ops-tools/pom.xml +++ b/warehouse/ops-tools/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ops-tools-parent pom diff --git a/warehouse/pom.xml b/warehouse/pom.xml index aed5f2b9ca2..da60b2d81f8 100644 --- a/warehouse/pom.xml +++ b/warehouse/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-warehouse-parent pom diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 64300b4d945..04c543989b6 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-query-core jar diff --git a/warehouse/regression-testing/pom.xml b/warehouse/regression-testing/pom.xml index 1a3914e166f..f6f35a2c5ce 100644 --- a/warehouse/regression-testing/pom.xml +++ b/warehouse/regression-testing/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-regression-testing ${project.artifactId} diff --git a/warehouse/ssdeep-common/pom.xml b/warehouse/ssdeep-common/pom.xml index 3a88da068d3..9580dadb252 100644 --- a/warehouse/ssdeep-common/pom.xml +++ b/warehouse/ssdeep-common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-warehouse-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ssdeep-common diff --git a/web-services/accumulo/pom.xml b/web-services/accumulo/pom.xml index 516818fe1aa..7669e315e63 100644 --- a/web-services/accumulo/pom.xml +++ b/web-services/accumulo/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-accumulo ejb diff --git a/web-services/atom/pom.xml b/web-services/atom/pom.xml index 601aec79d8c..ddaaed6572e 100644 --- a/web-services/atom/pom.xml +++ b/web-services/atom/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-atom ejb diff --git a/web-services/cached-results/pom.xml b/web-services/cached-results/pom.xml index 37bc5c05f15..410007ddedc 100644 --- a/web-services/cached-results/pom.xml +++ b/web-services/cached-results/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-cached-results ejb diff --git a/web-services/client/pom.xml b/web-services/client/pom.xml index 3f0a7fd18fc..738641fb237 100644 --- a/web-services/client/pom.xml +++ b/web-services/client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-client jar diff --git a/web-services/common-util/pom.xml b/web-services/common-util/pom.xml index a2c35d504ac..51cfe904575 100644 --- a/web-services/common-util/pom.xml +++ b/web-services/common-util/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-common-util jar diff --git a/web-services/common/pom.xml b/web-services/common/pom.xml index f933600d210..2f0eb0e5a65 100644 --- a/web-services/common/pom.xml +++ b/web-services/common/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-common ejb diff --git a/web-services/deploy/application/pom.xml b/web-services/deploy/application/pom.xml index 70c7e85d2cc..4fe946f6f48 100644 --- a/web-services/deploy/application/pom.xml +++ b/web-services/deploy/application/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-deploy-application ear diff --git a/web-services/deploy/configuration/pom.xml b/web-services/deploy/configuration/pom.xml index 04cf3f157f6..3b6d6fb3950 100644 --- a/web-services/deploy/configuration/pom.xml +++ b/web-services/deploy/configuration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-deploy-configuration jar diff --git a/web-services/deploy/docs/pom.xml b/web-services/deploy/docs/pom.xml index 89e96d95e1f..c95a2f27814 100644 --- a/web-services/deploy/docs/pom.xml +++ b/web-services/deploy/docs/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-deploy-docs war diff --git a/web-services/deploy/pom.xml b/web-services/deploy/pom.xml index e83ec3087a7..858dbd27d38 100644 --- a/web-services/deploy/pom.xml +++ b/web-services/deploy/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-deploy-parent diff --git a/web-services/deploy/spring-framework-integration/pom.xml b/web-services/deploy/spring-framework-integration/pom.xml index aeccf97f9ef..838a270b7ef 100644 --- a/web-services/deploy/spring-framework-integration/pom.xml +++ b/web-services/deploy/spring-framework-integration/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-deploy-parent - 7.14.0 + 7.15.0-SNAPSHOT spring-framework-integration ${project.artifactId} diff --git a/web-services/dictionary/pom.xml b/web-services/dictionary/pom.xml index 7ac930510f3..eb21dc01199 100644 --- a/web-services/dictionary/pom.xml +++ b/web-services/dictionary/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-dictionary ejb diff --git a/web-services/examples/client-login/pom.xml b/web-services/examples/client-login/pom.xml index 00fa98a8bf7..72ead4e5aa2 100644 --- a/web-services/examples/client-login/pom.xml +++ b/web-services/examples/client-login/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-examples-client-login ejb diff --git a/web-services/examples/http-client/pom.xml b/web-services/examples/http-client/pom.xml index 3956cf1896e..ca5023c63ae 100644 --- a/web-services/examples/http-client/pom.xml +++ b/web-services/examples/http-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-examples-http-client jar diff --git a/web-services/examples/jms-client/pom.xml b/web-services/examples/jms-client/pom.xml index 61ad2960d0a..dd3ca900b00 100644 --- a/web-services/examples/jms-client/pom.xml +++ b/web-services/examples/jms-client/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-examples-jms-client jar diff --git a/web-services/examples/pom.xml b/web-services/examples/pom.xml index c90c61ce4cb..871772f3dcb 100644 --- a/web-services/examples/pom.xml +++ b/web-services/examples/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-examples-parent pom diff --git a/web-services/examples/query-war/pom.xml b/web-services/examples/query-war/pom.xml index d1c53d16b46..7bb3f5ba60d 100644 --- a/web-services/examples/query-war/pom.xml +++ b/web-services/examples/query-war/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-examples-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-examples-query-war war diff --git a/web-services/map-reduce-embedded/pom.xml b/web-services/map-reduce-embedded/pom.xml index 13b36289a12..488f5dd8df1 100644 --- a/web-services/map-reduce-embedded/pom.xml +++ b/web-services/map-reduce-embedded/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-map-reduce-embedded jar diff --git a/web-services/map-reduce-status/pom.xml b/web-services/map-reduce-status/pom.xml index 9ee0c406dab..311108ca0ab 100644 --- a/web-services/map-reduce-status/pom.xml +++ b/web-services/map-reduce-status/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-map-reduce-status ejb diff --git a/web-services/map-reduce/pom.xml b/web-services/map-reduce/pom.xml index 9a4021ef621..e1ce201a113 100644 --- a/web-services/map-reduce/pom.xml +++ b/web-services/map-reduce/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-map-reduce ejb diff --git a/web-services/metrics/pom.xml b/web-services/metrics/pom.xml index 283fcceee9f..7310f743b7f 100644 --- a/web-services/metrics/pom.xml +++ b/web-services/metrics/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-metrics ejb diff --git a/web-services/model/pom.xml b/web-services/model/pom.xml index 1447beb9833..51dc071ca3e 100644 --- a/web-services/model/pom.xml +++ b/web-services/model/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-model ejb diff --git a/web-services/modification/pom.xml b/web-services/modification/pom.xml index 6126cc290d0..ab5d29308d7 100644 --- a/web-services/modification/pom.xml +++ b/web-services/modification/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-modification ejb diff --git a/web-services/pom.xml b/web-services/pom.xml index 2a4feae2c4e..6a45047963e 100644 --- a/web-services/pom.xml +++ b/web-services/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave datawave-parent - 7.14.0 + 7.15.0-SNAPSHOT gov.nsa.datawave.webservices datawave-ws-parent diff --git a/web-services/query-websocket/pom.xml b/web-services/query-websocket/pom.xml index a103976c7c0..0e7f2ec0bef 100644 --- a/web-services/query-websocket/pom.xml +++ b/web-services/query-websocket/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-query-websocket war diff --git a/web-services/query/pom.xml b/web-services/query/pom.xml index 1c316d12d8f..ab8e41e3a24 100644 --- a/web-services/query/pom.xml +++ b/web-services/query/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-query ejb diff --git a/web-services/rest-api/pom.xml b/web-services/rest-api/pom.xml index 2fb85df0ccf..3d80982681c 100644 --- a/web-services/rest-api/pom.xml +++ b/web-services/rest-api/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-rest-api war diff --git a/web-services/security/pom.xml b/web-services/security/pom.xml index da96be125d2..8d6208bf507 100644 --- a/web-services/security/pom.xml +++ b/web-services/security/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-security ejb diff --git a/web-services/web-root/pom.xml b/web-services/web-root/pom.xml index 97e79929792..f075f5fa5d7 100644 --- a/web-services/web-root/pom.xml +++ b/web-services/web-root/pom.xml @@ -4,7 +4,7 @@ gov.nsa.datawave.webservices datawave-ws-parent - 7.14.0 + 7.15.0-SNAPSHOT datawave-ws-web-root war From a4750b1f4a4e963072d901c87d43e2584631dc5a Mon Sep 17 00:00:00 2001 From: jeff <1583214+jschmidt10@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:55:37 +0000 Subject: [PATCH 09/16] Store active job cache pointer in Zookeeper (#2681) * Store active job cache pointer in Zookeeper * Reuse zookeeper jar function --- properties/default.properties | 2 + warehouse/ingest-core/pom.xml | 13 +++ .../ingest/jobcache/ActiveSetter.java | 54 ++++++++++++ .../jobcache/NonEmptyStringValidator.java | 16 ++++ .../ingest/jobcache/SetActiveCommand.java | 69 +++++++++++++++ .../ingest/jobcache/ActiveSetterTest.java | 83 +++++++++++++++++++ .../src/main/resources/bin/ingest/findJars.sh | 9 +- .../main/resources/bin/ingest/ingest-env.sh | 2 + .../main/resources/bin/ingest/ingest-libs.sh | 1 + .../resources/bin/ingest/load-job-cache.sh | 20 +++++ 10 files changed, 265 insertions(+), 4 deletions(-) create mode 100644 warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/ActiveSetter.java create mode 100644 warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/NonEmptyStringValidator.java create mode 100644 warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/SetActiveCommand.java create mode 100644 warehouse/ingest-core/src/test/java/datawave/ingest/jobcache/ActiveSetterTest.java diff --git a/properties/default.properties b/properties/default.properties index 5f7cbbc4854..104de4164b0 100644 --- a/properties/default.properties +++ b/properties/default.properties @@ -574,6 +574,8 @@ MAP_FILE_LOADER_EXTRA_ARGS=-ingestMetricsDisabled JOB_OBSERVERS= JOB_OBSERVER_EXTRA_OPTS= +ACTIVE_JOB_CACHE_PATH=/datawave/activeJobCache + # These should be set only if deploying on the CDH distro of Accumulo, # otherwise leave them blank WAREHOUSE_ACCUMULO_LIB= diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index e45839c5c09..c9ea5b9d58c 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -10,6 +10,10 @@ jar ${project.artifactId} + + com.beust + jcommander + com.clearspring.analytics stream @@ -64,6 +68,10 @@ org.apache.commons commons-jexl3 + + org.apache.curator + curator-client + org.apache.hadoop hadoop-annotations @@ -207,6 +215,11 @@ ${version.accumulo} test + + org.apache.curator + curator-test + test + org.javassist javassist diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/ActiveSetter.java b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/ActiveSetter.java new file mode 100644 index 00000000000..1ce6bf99a83 --- /dev/null +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/ActiveSetter.java @@ -0,0 +1,54 @@ +package datawave.ingest.jobcache; + +import java.nio.charset.Charset; + +import org.apache.curator.framework.CuratorFramework; + +/** + * Sets the active job cache. + */ +public class ActiveSetter { + + private final CuratorFramework zkClient; + + public ActiveSetter(CuratorFramework zkClient) { + this.zkClient = zkClient; + } + + /** + * Sets the active job cache in Zookeeper. + * + * @param path + * The ZK node to set + * @param activeJobCache + * The active job cache + * @throws Exception + * if the operation does not succeed + */ + public void set(String path, String activeJobCache) throws Exception { + if (path == null || path.isEmpty()) { + throw new IllegalArgumentException("path cannot be empty"); + } + if (activeJobCache == null || activeJobCache.isEmpty()) { + throw new IllegalArgumentException("activeJobCache cannot be empty"); + } + + if (!zkPathExists(path)) { + createZkPath(path); + } + + updateZkPath(path, activeJobCache); + } + + private boolean zkPathExists(String path) throws Exception { + return zkClient.checkExists().forPath(path) != null; + } + + private void createZkPath(String path) throws Exception { + zkClient.create().creatingParentsIfNeeded().forPath(path); + } + + private void updateZkPath(String path, String value) throws Exception { + zkClient.setData().forPath(path, value.getBytes(Charset.defaultCharset())); + } +} diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/NonEmptyStringValidator.java b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/NonEmptyStringValidator.java new file mode 100644 index 00000000000..317c8053c74 --- /dev/null +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/NonEmptyStringValidator.java @@ -0,0 +1,16 @@ +package datawave.ingest.jobcache; + +import com.beust.jcommander.IParameterValidator; +import com.beust.jcommander.ParameterException; + +/** + * Validates a parameter is a non-empty String. + */ +public class NonEmptyStringValidator implements IParameterValidator { + @Override + public void validate(String parameter, String value) throws ParameterException { + if (value == null || value.isEmpty()) { + throw new ParameterException(parameter + " must be a non-empty String."); + } + } +} diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/SetActiveCommand.java b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/SetActiveCommand.java new file mode 100644 index 00000000000..270b7cae4bf --- /dev/null +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/jobcache/SetActiveCommand.java @@ -0,0 +1,69 @@ +package datawave.ingest.jobcache; + +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import com.beust.jcommander.ParameterException; +import com.beust.jcommander.Parameters; + +/** + * Command line tool to set the active job cache in Zookeeper. + */ +@Parameters(commandDescription = "Sets the active job cache in Zookeeper.") +public class SetActiveCommand { + + private static final int ZK_NUM_RETRIES = 3; + private static final int ZK_RETRY_SLEEP_MS = 1000; + + @Parameter(names = {"-z", "--zookeepers"}, description = "The zookeeper servers to update.", required = true, validateWith = NonEmptyStringValidator.class) + private String zookeepers; + + @Parameter(names = {"-p", "--path"}, description = "The zookeeper path where the active job cache will be stored.", required = true, + validateWith = NonEmptyStringValidator.class) + private String zkPath; + + @Parameter(names = {"-j", "--job-cache"}, description = "The full HDFS path to the active job cache (e.g. 'hdfs://ingest/data/jobCacheA').", + required = true, validateWith = NonEmptyStringValidator.class) + private String jobCache; + + @Parameter(names = {"-h", "--help"}, description = "Prints the command usage.", help = true) + private boolean help; + + public void run() { + try (CuratorFramework zkClient = CuratorFrameworkFactory.newClient(zookeepers, new ExponentialBackoffRetry(ZK_RETRY_SLEEP_MS, ZK_NUM_RETRIES))) { + zkClient.start(); + + new ActiveSetter(zkClient).set(zkPath, jobCache); + + } catch (Exception e) { + throw new RuntimeException("Failed to update " + zkPath + " to " + jobCache + ". Try again.", e); + } + } + + public boolean isHelp() { + return help; + } + + public static void main(String[] args) { + SetActiveCommand tool = new SetActiveCommand(); + JCommander jcommander = JCommander.newBuilder().addObject(tool).build(); + + try { + jcommander.parse(args); + + if (tool.isHelp()) { + jcommander.usage(); + } else { + tool.run(); + } + + } catch (ParameterException e) { + System.err.println(e.getMessage()); + jcommander.usage(); + System.exit(1); + } + } +} diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/jobcache/ActiveSetterTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/jobcache/ActiveSetterTest.java new file mode 100644 index 00000000000..630111801f2 --- /dev/null +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/jobcache/ActiveSetterTest.java @@ -0,0 +1,83 @@ +package datawave.ingest.jobcache; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.nio.charset.Charset; + +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.apache.curator.test.TestingServer; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class ActiveSetterTest { + + private static final String ACTIVE_PATH = "/datawave/activeJobCache"; + + private static TestingServer ZK_SERVER; + private static CuratorFramework ZK_CLIENT; + + private ActiveSetter activeSetter; + + @BeforeClass + public static void setupAll() throws Exception { + ZK_SERVER = new TestingServer(); + ZK_SERVER.start(); + + ZK_CLIENT = CuratorFrameworkFactory.newClient(ZK_SERVER.getConnectString(), new ExponentialBackoffRetry(1000, 3)); + ZK_CLIENT.start(); + } + + @Before + public void setup() { + activeSetter = new ActiveSetter(ZK_CLIENT); + } + + @Test + public void shouldSetActive() throws Exception { + String activeJobCache = "/data/jobCacheA"; + + activeSetter.set(ACTIVE_PATH, activeJobCache); + + assertZkData(ACTIVE_PATH, activeJobCache); + } + + @Test + public void shouldOverwrite() throws Exception { + String activeJobCache1 = "/data/jobCacheA"; + String activeJobCache2 = "/data/jobCacheB"; + + activeSetter.set(ACTIVE_PATH, activeJobCache1); + activeSetter.set(ACTIVE_PATH, activeJobCache2); + + assertZkData(ACTIVE_PATH, activeJobCache2); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldNotAcceptEmptyPath() throws Exception { + String emptyJobCache = ""; + + activeSetter.set(ACTIVE_PATH, emptyJobCache); + } + + private void assertZkData(String path, String expectedValue) throws Exception { + String actualValue = new String(ZK_CLIENT.getData().forPath(path), Charset.defaultCharset()); + + assertEquals(expectedValue, actualValue); + } + + @AfterClass + public static void tearDownAll() throws IOException { + if (ZK_CLIENT != null) { + ZK_CLIENT.close(); + } + + if (ZK_SERVER != null) { + ZK_SERVER.stop(); + } + } +} diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/findJars.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/findJars.sh index 6afb4f20f71..4b20424ee05 100644 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/findJars.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/findJars.sh @@ -20,11 +20,11 @@ findAccumuloJar (){ ls -1 $WAREHOUSE_ACCUMULO_LIB/$1-[0-9]*.jar | sort | tail -1 } findZookeeperJar(){ - result=$(ls -1 $ZOOKEEPER_HOME/zookeeper-*.jar 2>/dev/null | head -1) - [[ -f $result ]] || result=$(ls -1 $ZOOKEEPER_HOME/lib/zookeeper-*.jar | head -1) + result=$(ls -1 $ZOOKEEPER_HOME/$1-*.jar 2>/dev/null | head -1) + [[ -f $result ]] || result=$(ls -1 $ZOOKEEPER_HOME/lib/$1-*.jar | head -1) + echo $result } - CONF_DIR=../../config DATAWAVE_INDEX_STATS_JAR=$(findJar datawave-index-stats) DATAWAVE_INGEST_CSV_JAR=$(findJar datawave-ingest-csv) @@ -94,7 +94,8 @@ INFINISPAN_CORE_JAR=$(findJar infinispan-core) INFINISPAN_COMMONS_JAR=$(findJar infinispan-commons) JBOSS_LOGGING_JAR=$(findJar jboss-logging) JGROUPS_JAR=$(findJar jgroups) -ZOOKEEPER_JAR=$(findZookeeperJar) +ZOOKEEPER_JAR=$(findZookeeperJar zookeeper) +ZOOKEEPER_JUTE_JAR=$(findZookeeperJar zookeeper-jute) DATAWAVE_QUERY_CORE_JAR=$(findJar datawave-query-core) COMMONS_JEXL_JAR=$(findJar commons-jexl3) PROTOSTUFF_API_JAR=$(findJar protostuff-api) diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-env.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-env.sh index 422c211baf3..c1a411eef1d 100755 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-env.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-env.sh @@ -195,6 +195,8 @@ HDFS_BASE_DIR="${HDFS_BASE_DIR}" BASE_WORK_DIR="${BASE_WORK_DIR}" BASE_WORK_DIR="${BASE_WORK_DIR:-/datawave/ingest/work}" +ACTIVE_JOB_CACHE_PATH="${ACTIVE_JOB_CACHE_PATH}" + HDFS_MONITOR_ARGS="${HDFS_MONITOR_ARGS}" MONITOR_SERVER_HOST="${MONITOR_SERVER_HOST}" diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-libs.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-libs.sh index 9cadcff87dc..51a9509791b 100755 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-libs.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/ingest-libs.sh @@ -68,6 +68,7 @@ CLASSPATH=${CLASSPATH}:${INFINISPAN_COMMONS_JAR} CLASSPATH=${CLASSPATH}:${JBOSS_LOGGING_JAR} CLASSPATH=${CLASSPATH}:${JGROUPS_JAR} CLASSPATH=${CLASSPATH}:${ZOOKEEPER_JAR} +CLASSPATH=${CLASSPATH}:${ZOOKEEPER_JUTE_JAR} CLASSPATH=${CLASSPATH}:${OPENCSV_JAR} CLASSPATH=${CLASSPATH}:${STREAMLIB} CLASSPATH=${CLASSPATH}:${JCOMMANDER_JAR} diff --git a/warehouse/ingest-scripts/src/main/resources/bin/ingest/load-job-cache.sh b/warehouse/ingest-scripts/src/main/resources/bin/ingest/load-job-cache.sh index 0761f4cb585..85236cc91c6 100755 --- a/warehouse/ingest-scripts/src/main/resources/bin/ingest/load-job-cache.sh +++ b/warehouse/ingest-scripts/src/main/resources/bin/ingest/load-job-cache.sh @@ -12,6 +12,7 @@ THIS_DIR="${THIS_SCRIPT%/*}" cd $THIS_DIR . ../ingest/ingest-env.sh +. ../ingest/ingest-libs.sh . ../ingest/job-cache-env.sh # Check that there are no other instances of this script running @@ -99,6 +100,25 @@ else echo "Warehouse and ingest are one in the same. Assuming the warehouse job cache loading is sufficient" fi +# Update Zookeeper if we have an active job cache path +if [[ -n "${ACTIVE_JOB_CACHE_PATH}" ]]; then + if ! java -cp ${CLASSPATH} datawave.ingest.jobcache.SetActiveCommand \ + --zookeepers ${INGEST_ZOOKEEPERS} \ + --path ${ACTIVE_JOB_CACHE_PATH} \ + --job-cache "${INGEST_HDFS_NAME_NODE}${JOB_CACHE_DIR}"; then + echo "[ERROR] Failed to set active ingest job cache" + fi + + if [[ "$WAREHOUSE_HDFS_NAME_NODE" != "$INGEST_HDFS_NAME_NODE" ]]; then + if ! java -cp ${CLASSPATH} datawave.ingest.jobcache.SetActiveCommand \ + --zookeepers ${WAREHOUSE_ZOOKEEPERS} \ + --path ${ACTIVE_JOB_CACHE_PATH} \ + --job-cache "${WAREHOUSE_HDFS_NAME_NODE}${JOB_CACHE_DIR}"; then + echo "[ERROR] Failed to set active warehouse job cache" + fi + fi +fi + # Remove the prepared directory rm -r -f $tmpdir trap - INT TERM EXIT From dc0cf079efd7370e855615f99c0ce392a7103a13 Mon Sep 17 00:00:00 2001 From: Laura Schanno Date: Tue, 14 Jan 2025 14:19:49 -0500 Subject: [PATCH 10/16] Add ability to configure date types for no expansion (#2662) * Add ability to configure date types for no expansion Add the ability to configure date types that will not result in date filter and SHARD_AND_DAYS hint addition if the query's date type is one that should not be expanded when the query's end date is the current date. Fixes #2636 --- .../query/config/ShardQueryConfiguration.java | 49 ++++- .../query/planner/DefaultQueryPlanner.java | 135 +++++++----- .../query/tables/ShardQueryLogic.java | 8 + .../config/ShardQueryConfigurationTest.java | 6 + .../planner/DefaultQueryPlannerTest.java | 203 ++++++++++++++++++ 5 files changed, 339 insertions(+), 62 deletions(-) create mode 100644 warehouse/query-core/src/test/java/datawave/query/planner/DefaultQueryPlannerTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index b9beb4e21d1..b88c9527ff2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -527,6 +527,18 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement */ private double fieldIndexHoleMinThreshold = 1.0d; + /** + * The set of date types that, if the query's end date is the current date, will NOT result in any date range adjustments or the addition of a + * SHARDS_AND_DAYS hint. + */ + private Set noExpansionIfCurrentDateTypes = Collections.emptySet(); + + /** + * Whether the SHARDS_AND_DAYS hint should be allowed for the query. This will be set to false iff the query specified a date type, and the date type is + * present in {@link #noExpansionIfCurrentDateTypes}, and the query's end date is the current date. + */ + private boolean shardsAndDaysHintAllowed = true; + /** * Default constructor */ @@ -768,6 +780,9 @@ public void copyFrom(ShardQueryConfiguration other) { this.setUseQueryTreeScanHintRules(other.isUseQueryTreeScanHintRules()); this.setQueryTreeScanHintRules(other.getQueryTreeScanHintRules()); this.setFieldIndexHoleMinThreshold(other.getFieldIndexHoleMinThreshold()); + this.setNoExpansionIfCurrentDateTypes( + other.getNoExpansionIfCurrentDateTypes() == null ? null : Sets.newHashSet(other.getNoExpansionIfCurrentDateTypes())); + this.setShardsAndDaysHintAllowed(other.isShardsAndDaysHintAllowed()); } /** @@ -2832,12 +2847,15 @@ public void setCardinalityThreshold(int cardinalityThreshold) { @Override public boolean equals(Object o) { - if (this == o) + if (this == o) { return true; - if (o == null || getClass() != o.getClass()) + } + if (o == null || getClass() != o.getClass()) { return false; - if (!super.equals(o)) + } + if (!super.equals(o)) { return false; + } // @formatter:off ShardQueryConfiguration that = (ShardQueryConfiguration) o; return isTldQuery() == that.isTldQuery() && @@ -3038,7 +3056,10 @@ public boolean equals(Object o) { isSortQueryPreIndexWithFieldCounts() == that.isSortQueryPreIndexWithFieldCounts() && isSortQueryPostIndexWithTermCounts() == that.isSortQueryPostIndexWithTermCounts() && isSortQueryPostIndexWithFieldCounts() == that.isSortQueryPostIndexWithFieldCounts() && - getCardinalityThreshold() == that.getCardinalityThreshold(); + getCardinalityThreshold() == that.getCardinalityThreshold() && + Objects.equals(getNoExpansionIfCurrentDateTypes(), that.getNoExpansionIfCurrentDateTypes()) && + isShardsAndDaysHintAllowed() == that.isShardsAndDaysHintAllowed(); + // @formatter:on } @@ -3244,7 +3265,9 @@ public int hashCode() { isSortQueryPreIndexWithFieldCounts(), isSortQueryPostIndexWithTermCounts(), isSortQueryPostIndexWithFieldCounts(), - getCardinalityThreshold() + getCardinalityThreshold(), + getNoExpansionIfCurrentDateTypes(), + isShardsAndDaysHintAllowed() ); // @formatter:on } @@ -3279,4 +3302,20 @@ public long getMaxAnyFieldScanTimeMillis() { public void setMaxAnyFieldScanTimeMillis(long maxAnyFieldScanTimeMillis) { this.maxAnyFieldScanTimeMillis = maxAnyFieldScanTimeMillis; } + + public Set getNoExpansionIfCurrentDateTypes() { + return noExpansionIfCurrentDateTypes; + } + + public void setNoExpansionIfCurrentDateTypes(Set noExpansionIfCurrentDateTypes) { + this.noExpansionIfCurrentDateTypes = noExpansionIfCurrentDateTypes; + } + + public boolean isShardsAndDaysHintAllowed() { + return shardsAndDaysHintAllowed; + } + + public void setShardsAndDaysHintAllowed(boolean shardsAndDaysHintAllowed) { + this.shardsAndDaysHintAllowed = shardsAndDaysHintAllowed; + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 28c623e6bf3..5b517da26ed 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -843,15 +843,18 @@ protected ASTJexlScript updateQueryTree(ScannerFactory scannerFactory, MetadataH throw new DatawaveFatalQueryException("Found incorrectly marked bounded ranges"); } - if (optionsMap.containsKey(QueryParameters.SHARDS_AND_DAYS)) { - config.setQueryTree(timedAddShardsAndDaysFromOptions(timers, config.getQueryTree(), optionsMap)); - } else { - // look for the shards and days hint in the query settings - // the shards and days hint cannot always be specified in the query string when using certain query parsers - Parameter parameter = settings.findParameter(QueryParameters.SHARDS_AND_DAYS); - if (StringUtils.isNotBlank(parameter.getParameterValue())) { - optionsMap.put(QueryParameters.SHARDS_AND_DAYS, parameter.getParameterValue()); + // Do not add a SHARDS_AND_DAYS hint if it is specifically not allowed. This was checked and updated when timedIncludeDateFilters was called. + if (config.isShardsAndDaysHintAllowed()) { + if (optionsMap.containsKey(QueryParameters.SHARDS_AND_DAYS)) { config.setQueryTree(timedAddShardsAndDaysFromOptions(timers, config.getQueryTree(), optionsMap)); + } else { + // look for the shards and days hint in the query settings + // the shards and days hint cannot always be specified in the query string when using certain query parsers + Parameter parameter = settings.findParameter(QueryParameters.SHARDS_AND_DAYS); + if (StringUtils.isNotBlank(parameter.getParameterValue())) { + optionsMap.put(QueryParameters.SHARDS_AND_DAYS, parameter.getParameterValue()); + config.setQueryTree(timedAddShardsAndDaysFromOptions(timers, config.getQueryTree(), optionsMap)); + } } } @@ -2122,57 +2125,73 @@ public ASTJexlScript addDateFilters(final ASTJexlScript queryTree, ScannerFactor } } - // if we are using something other than the default of EVENT date - // time, then we need to modify the query - if (!dateType.equals(defaultDateType)) { + // Get the set of date types that should not be expanded if the end date is the current date. + // @formatter:off + Set noExpansionIfCurrentDateTypes = config.getNoExpansionIfCurrentDateTypes() == null ? Collections.emptySet() : + config.getNoExpansionIfCurrentDateTypes().stream() + .map(String::trim) + .map(String::toUpperCase) + .collect(Collectors.toSet()); + // @formatter:on - log.info("Using the date index for " + dateType); - // if no date index helper configured, then we are in error - if (dateIndexHelper == null) { - throw new DatawaveQueryException("Requested date range of type " + dateType + " but no date index is configured"); - } - // get all of the fields used for this date type - DateIndexHelper.DateTypeDescription dateIndexData = dateIndexHelper.getTypeDescription(dateType, config.getBeginDate(), config.getEndDate(), - config.getDatatypeFilter()); - if (dateIndexData.getFields().isEmpty()) { - log.warn("The specified date type: " + dateType + " is unknown for the specified data types"); - // If this is the case, then essentially we have no dates to search. Adding the filter function with _NO_FIELD_ will have the desired effect. - // Also it will be understandable from the plan as to why no results were returned. - dateIndexData.getFields().add(Constants.NO_FIELD); - } - log.info("Adding date filters for the following fields: " + dateIndexData.getFields()); - // now for each field, add an expression to filter that date - List andChildren = new ArrayList<>(); - for (int i = 0; i < queryTree.jjtGetNumChildren(); i++) { - if (queryTree.jjtGetChild(i) instanceof ASTAndNode) { - andChildren.add(queryTree.jjtGetChild(i)); + // If the date type is one marked for no expansion if current, and the query's end date is the current date, do not add any date filters, and do not + // allow a SHARDS_AND_DAYS hint to be added later. + if (config.getNoExpansionIfCurrentDateTypes().contains(dateType) && DateUtils.isSameDay(new Date(), config.getEndDate())) { + log.info("Query end date equals current date and date type " + dateType + + " is marked for no expansion if current. SHARDS_AND_DAYS hint will be forbidden."); + config.setShardsAndDaysHintAllowed(false); + } else { + // If we are using something other than the default of EVENT date time, then we need to modify the query. + if (!dateType.equals(defaultDateType)) { + log.info("Using the date index for " + dateType); + // if no date index helper configured, then we are in error + if (dateIndexHelper == null) { + throw new DatawaveQueryException("Requested date range of type " + dateType + " but no date index is configured"); + } + // get all of the fields used for this date type + DateIndexHelper.DateTypeDescription dateIndexData = dateIndexHelper.getTypeDescription(dateType, config.getBeginDate(), config.getEndDate(), + config.getDatatypeFilter()); + if (dateIndexData.getFields().isEmpty()) { + log.warn("The specified date type: " + dateType + " is unknown for the specified data types"); + // If this is the case, then essentially we have no dates to search. Adding the filter function with _NO_FIELD_ will have the desired + // effect. + // Also it will be understandable from the plan as to why no results were returned. + dateIndexData.getFields().add(Constants.NO_FIELD); + } + log.info("Adding date filters for the following fields: " + dateIndexData.getFields()); + // now for each field, add an expression to filter that date + List andChildren = new ArrayList<>(); + for (int i = 0; i < queryTree.jjtGetNumChildren(); i++) { + if (queryTree.jjtGetChild(i) instanceof ASTAndNode) { + andChildren.add(queryTree.jjtGetChild(i)); + } else { + andChildren.add(JexlNodeFactory.createExpression(queryTree.jjtGetChild(i))); + } + } + List orChildren = new ArrayList<>(); + for (String field : dateIndexData.getFields()) { + orChildren.add(createDateFilter(dateType, field, config.getBeginDate(), config.getEndDate())); + } + if (orChildren.size() > 1) { + andChildren.add(JexlNodeFactory.createOrNode(orChildren)); } else { - andChildren.add(JexlNodeFactory.createExpression(queryTree.jjtGetChild(i))); + andChildren.addAll(orChildren); } - } - List orChildren = new ArrayList<>(); - for (String field : dateIndexData.getFields()) { - orChildren.add(createDateFilter(dateType, field, config.getBeginDate(), config.getEndDate())); - } - if (orChildren.size() > 1) { - andChildren.add(JexlNodeFactory.createOrNode(orChildren)); + JexlNode andNode = JexlNodeFactory.createAndNode(andChildren); + JexlNodeFactory.setChildren(queryTree, Collections.singleton(andNode)); + + // now lets update the query parameters with the correct start and + // end dates + log.info("Remapped " + dateType + " dates [" + config.getBeginDate() + "," + config.getEndDate() + "] to EVENT dates " + + dateIndexData.getBeginDate() + "," + dateIndexData.getEndDate()); + + // reset the dates in the configuration, no need to reset then in + // the Query settings object + config.setBeginDate(dateIndexData.getBeginDate()); + config.setEndDate(dateIndexData.getEndDate()); } else { - andChildren.addAll(orChildren); + log.info("Date index not needed for this query"); } - JexlNode andNode = JexlNodeFactory.createAndNode(andChildren); - JexlNodeFactory.setChildren(queryTree, Collections.singleton(andNode)); - - // now lets update the query parameters with the correct start and - // end dates - log.info("Remapped " + dateType + " dates [" + config.getBeginDate() + "," + config.getEndDate() + "] to EVENT dates " - + dateIndexData.getBeginDate() + "," + dateIndexData.getEndDate()); - - // reset the dates in the configuration, no need to reset then in - // the Query settings object - config.setBeginDate(dateIndexData.getBeginDate()); - config.setEndDate(dateIndexData.getEndDate()); - } else { - log.info("Date index not needed for this query"); } return queryTree; @@ -2456,16 +2475,18 @@ public List getShuffledIvaratoCacheDirConfigs(ShardQuery */ protected IteratorSetting getQueryIterator(MetadataHelper metadataHelper, ShardQueryConfiguration config, String queryString, Boolean isFullTable, boolean isPreload) throws DatawaveQueryException { - if (null == settingFuture) + if (null == settingFuture) { settingFuture = loadQueryIterator(metadataHelper, config, isFullTable, isPreload); - if (settingFuture.isDone()) + } + if (settingFuture.isDone()) { try { return settingFuture.get(); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e.getCause()); } - else + } else { return null; + } } public void configureTypeMappings(ShardQueryConfiguration config, IteratorSetting cfg, MetadataHelper metadataHelper, boolean compressMappings) diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index e5c5548d970..d6ff6b93b34 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -3027,4 +3027,12 @@ public void setFieldIndexHoleMinThreshold(double fieldIndexHoleMinThreshold) { public double getFieldIndexHoleMinThreshold(int fieldIndexHoleMinThreshold) { return getConfig().getFieldIndexHoleMinThreshold(); } + + public Set getNoExpansionIfCurrentDateTypes() { + return getConfig().getNoExpansionIfCurrentDateTypes(); + } + + public void setNoExpansionIfCurrentDateTypes(Set noExpansionIfCurrentDateTypes) { + getConfig().setNoExpansionIfCurrentDateTypes(noExpansionIfCurrentDateTypes); + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index f517706b124..7ae3cdccfc3 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -604,6 +604,12 @@ public void setUp() throws Exception { updatedValues.put("useQueryTreeScanHintRules", true); defaultValues.put("queryTreeScanHintRules", Collections.emptyList()); updatedValues.put("queryTreeScanHintRules", Collections.singletonList(new IvaratorScanHint())); + + defaultValues.put("noExpansionIfCurrentDateTypes", Collections.emptySet()); + updatedValues.put("noExpansionIfCurrentDateTypes", Collections.singleton("EVENT")); + + defaultValues.put("shardsAndDaysHintAllowed", true); + updatedValues.put("shardsAndDaysHintAllowed", false); } private Query createQuery(String query) { diff --git a/warehouse/query-core/src/test/java/datawave/query/planner/DefaultQueryPlannerTest.java b/warehouse/query-core/src/test/java/datawave/query/planner/DefaultQueryPlannerTest.java new file mode 100644 index 00000000000..dd7a5387037 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/planner/DefaultQueryPlannerTest.java @@ -0,0 +1,203 @@ +package datawave.query.planner; + +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Set; + +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import datawave.ingest.mapreduce.handler.dateindex.DateIndexUtil; +import datawave.microservice.query.Query; +import datawave.microservice.query.QueryImpl; +import datawave.query.QueryParameters; +import datawave.query.config.ShardQueryConfiguration; +import datawave.query.exceptions.DatawaveQueryException; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.tables.ScannerFactory; +import datawave.query.util.DateIndexHelper; +import datawave.query.util.MetadataHelper; +import datawave.query.util.MockDateIndexHelper; +import datawave.test.JexlNodeAssert; +import datawave.util.time.DateHelper; + +class DefaultQueryPlannerTest { + + /** + * Contains tests for + * {@link DefaultQueryPlanner#addDateFilters(ASTJexlScript, ScannerFactory, MetadataHelper, DateIndexHelper, ShardQueryConfiguration, Query)} + */ + @Nested + class DateFilterTests { + + private final SimpleDateFormat filterFormat = new SimpleDateFormat("yyyyMMdd:HH:mm:ss:SSSZ"); + + private DefaultQueryPlanner planner; + private ShardQueryConfiguration config; + private QueryImpl settings; + private MockDateIndexHelper dateIndexHelper; + private ASTJexlScript queryTree; + + @BeforeEach + void setUp() { + planner = new DefaultQueryPlanner(); + config = new ShardQueryConfiguration(); + settings = new QueryImpl(); + dateIndexHelper = new MockDateIndexHelper(); + } + + /** + * Verify that when the date type is the default date type, and is part of the noExpansionIfCurrentDateTypes types, and the query's end date is the + * current date, that no date filters are added and SHARDS_AND_DAYS hints are forbidden. + */ + @Test + void testDefaultDateTypeMarkedForNoExpansionAndEndDateIsCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("EVENT")); + + Date beginDate = DateHelper.parse("20241001"); + config.setBeginDate(beginDate); + Date endDate = new Date(); + config.setEndDate(endDate); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).isEqualTo("FOO == 'bar'"); + Assertions.assertFalse(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(beginDate, config.getBeginDate()); + Assertions.assertEquals(endDate, config.getEndDate()); + } + + /** + * Verify that when a date type is given via parameters that is part of the noExpansionIfCurrentDateTypes types, and the query's end date is the current + * date, that no date filters are added and SHARDS_AND_DAYS hints are forbidden. + */ + @Test + void testParamDateTypeMarkedForNoExpansionAndEndDateIsCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("SPECIAL_EVENT")); + + Date beginDate = DateHelper.parse("20241001"); + config.setBeginDate(beginDate); + Date endDate = new Date(); + config.setEndDate(endDate); + + settings.addParameter(QueryParameters.DATE_RANGE_TYPE, "SPECIAL_EVENT"); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).isEqualTo("FOO == 'bar'"); + Assertions.assertFalse(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(beginDate, config.getBeginDate()); + Assertions.assertEquals(endDate, config.getEndDate()); + } + + /** + * Verify that when the date type is the default date type, and is part of the noExpansionIfCurrentDateTypes types, but the query's end date is not the + * current date, that no date filters are added and SHARDS_AND_DAYS hints are allowed. + */ + @Test + void testDefaultDateTypeMarkedForNoExpansionAndEndDateIsNotCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("EVENT")); + + Date beginDate = DateHelper.parse("20241001"); + config.setBeginDate(beginDate); + Date endDate = DateHelper.parse("20241010"); + config.setEndDate(endDate); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).isEqualTo("FOO == 'bar'"); + Assertions.assertTrue(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(beginDate, config.getBeginDate()); + Assertions.assertEquals(endDate, config.getEndDate()); + } + + /** + * Verify that when a date type is given via parameters that is part of the noExpansionIfCurrentDateTypes types, but the query's end date is not the + * current date, that date filters are added and SHARDS_AND_DAYS hints are allowed. + */ + @Test + void testParamDateTypeMarkedForNoExpansionAndEndDateIsNotCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("SPECIAL_EVENT")); + Date beginDate = DateHelper.parse("20241009"); + config.setBeginDate(beginDate); + Date endDate = DateHelper.parse("20241011"); + config.setEndDate(endDate); + settings.addParameter(QueryParameters.DATE_RANGE_TYPE, "SPECIAL_EVENT"); + dateIndexHelper.addEntry("20241010", "SPECIAL_EVENT", "wiki", "FOO", "20241010_shard"); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).hasExactQueryString( + "(FOO == 'bar') && filter:betweenDates(FOO, '" + filterFormat.format(beginDate) + "', '" + filterFormat.format(endDate) + "')"); + Assertions.assertTrue(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(DateIndexUtil.getBeginDate("20241010"), config.getBeginDate()); + Assertions.assertEquals(DateIndexUtil.getEndDate("20241010"), config.getEndDate()); + } + + /** + * Verify that when the date type is the default date type, and is not part of the noExpansionIfCurrentDateTypes types, and the query's end date is the + * current date, that no date filters are added and SHARDS_AND_DAYS hints are allowed. + */ + @Test + void testDefaultDateTypeIsNotMarkedForNoExpansionAndEndDateNotCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("OTHER_EVENT")); + + Date beginDate = DateHelper.parse("20241001"); + config.setBeginDate(beginDate); + Date endDate = DateHelper.parse("20241010"); + config.setEndDate(endDate); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).isEqualTo("FOO == 'bar'"); + Assertions.assertTrue(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(beginDate, config.getBeginDate()); + Assertions.assertEquals(endDate, config.getEndDate()); + } + + /** + * Verify that when a date type is given via parameters that is not part of the noExpansionIfCurrentDateTypes types, and the query's end date is the + * current date, that date filters are added and SHARDS_AND_DAYS hints are allowed. + */ + @Test + void testParamDateTypeIsNotMarkedForNoExpansionAndEndDateIsCurrDate() throws Exception { + queryTree = JexlASTHelper.parseJexlQuery("FOO == 'bar'"); + config.setDefaultDateTypeName("EVENT"); + config.setNoExpansionIfCurrentDateTypes(Set.of("OTHER_EVENT")); + config.setBeginDate(DateHelper.parse("20241009")); + Date beginDate = DateHelper.parse("20241001"); + config.setBeginDate(beginDate); + Date endDate = new Date(); + config.setEndDate(endDate); + + settings.addParameter(QueryParameters.DATE_RANGE_TYPE, "SPECIAL_EVENT"); + dateIndexHelper.addEntry("20241010", "SPECIAL_EVENT", "wiki", "FOO", "20241010_shard"); + + ASTJexlScript actual = addDateFilters(); + + JexlNodeAssert.assertThat(actual).hasExactQueryString( + "(FOO == 'bar') && filter:betweenDates(FOO, '" + filterFormat.format(beginDate) + "', '" + filterFormat.format(endDate) + "')"); + Assertions.assertTrue(config.isShardsAndDaysHintAllowed()); + Assertions.assertEquals(DateIndexUtil.getBeginDate("20241010"), config.getBeginDate()); + Assertions.assertEquals(DateIndexUtil.getEndDate("20241010"), config.getEndDate()); + } + + private ASTJexlScript addDateFilters() throws TableNotFoundException, DatawaveQueryException { + return planner.addDateFilters(queryTree, null, null, dateIndexHelper, config, settings); + } + } +} From 9d8519613c42400e2d278284fe822d3a5f237767 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Wed, 15 Jan 2025 14:05:26 +0000 Subject: [PATCH 11/16] Prevent the IteratorBuildingVisitor from making unnecessary source deep copies (#2686) IteratorBuildingVisitor now tracks the number of source deep copies Update TLD and Ancestor variants now delegate to source deep copy method --- .../AncestorIndexBuildingVisitor.java | 8 +- .../visitors/IteratorBuildingVisitor.java | 197 ++++++++++-------- .../query/tld/TLDIndexBuildingVisitor.java | 66 +++--- .../query/iterator/QueryIteratorIT.java | 34 ++- .../visitors/IteratorBuildingVisitorTest.java | 24 +++ 5 files changed, 199 insertions(+), 130 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorIndexBuildingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorIndexBuildingVisitor.java index 23b01f6c087..36aef3b9505 100644 --- a/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorIndexBuildingVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/ancestor/AncestorIndexBuildingVisitor.java @@ -50,14 +50,14 @@ protected SortedKeyValueIterator getSourceIterator(final ASTEQNode no if (limitLookup && !negation) { final String identifier = JexlASTHelper.getIdentifier(node); if (!disableFiEval && fieldsToAggregate.contains(identifier)) { - final SortedKeyValueIterator baseIterator = source.deepCopy(env); + final SortedKeyValueIterator baseIterator = deepCopySource(); kvIter = new AncestorChildExpansionIterator(baseIterator, getMembers(), equality); seekIndexOnlyDocument(kvIter, node); } else { kvIter = new IteratorToSortedKeyValueIterator(getNodeEntry(node).iterator()); } } else { - kvIter = source.deepCopy(env); + kvIter = deepCopySource(); seekIndexOnlyDocument(kvIter, node); } @@ -105,7 +105,7 @@ private List getMembers() { // use the cached tree if available if (members == null) { - SortedKeyValueIterator kvIter = source.deepCopy(env); + SortedKeyValueIterator kvIter = deepCopySource(); members = getMembers(wholeDocRange.getStartKey().getRow().toString(), tld, dataType, kvIter); // set the members for later use @@ -132,7 +132,7 @@ protected Collection> getNodeEntry(ASTEQNode node) { // use the cached tree if available if (members == null) { - SortedKeyValueIterator kvIter = source.deepCopy(env); + SortedKeyValueIterator kvIter = deepCopySource(); members = getMembers(wholeDocRange.getStartKey().getRow().toString(), tld, dataType, kvIter); // set the members for later use diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java index 39c42aa116a..fde988e4bf1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/IteratorBuildingVisitor.java @@ -219,6 +219,7 @@ public class IteratorBuildingVisitor extends BaseVisitor { protected Set delayedEqNodes = Sets.newHashSet(); protected Map exceededOrEvaluationCache; + private int deepCopiesCalled = 0; public boolean isQueryFullySatisfied() { if (limitLookup) { @@ -235,7 +236,7 @@ public NestedIterator root() { @Override public Object visit(ASTJexlScript node, Object data) { if (limitLookup) { - limitedSource = source.deepCopy(env); + limitedSource = deepCopySource(); limitedMap = Maps.newHashMap(); } Object obj = super.visit(node, data); @@ -467,10 +468,10 @@ LiteralRange buildLiteralRange(ASTNRNode node) { private NestedIterator buildExceededFromTermFrequency(String identifier, JexlNode rootNode, JexlNode sourceNode, LiteralRange range, Object data) { if (limitLookup) { ChainableEventDataQueryFilter wrapped = createWrappedTermFrequencyFilter(sourceNode, attrFilter); - NestedIterator eventFieldIterator = new EventFieldIterator(rangeLimiter, source.deepCopy(env), identifier, - new AttributeFactory(this.typeMetadata), getEventFieldAggregator(identifier, wrapped)); + NestedIterator eventFieldIterator = new EventFieldIterator(rangeLimiter, deepCopySource(), identifier, new AttributeFactory(this.typeMetadata), + getEventFieldAggregator(identifier, wrapped)); TermFrequencyIndexBuilder builder = new TermFrequencyIndexBuilder(); - builder.setSource(source.deepCopy(env)); + builder.setSource(deepCopySource()); builder.setTypeMetadata(typeMetadata); builder.setFieldsToAggregate(fieldsToAggregate); builder.setTimeFilter(timeFilter); @@ -608,26 +609,20 @@ public Object visit(ASTNENode node, Object data) { // We don't support querying only on a negation throw new IllegalStateException("Root node cannot be a negation"); } - IndexIteratorBuilder builder = null; - try { - builder = iteratorBuilderClass.asSubclass(IndexIteratorBuilder.class).getDeclaredConstructor().newInstance(); - } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { - throw new RuntimeException(e); - } - builder.setQueryId(queryId); - builder.setSource(source.deepCopy(env)); - builder.setTypeMetadata(typeMetadata); - builder.setFieldsToAggregate(fieldsToAggregate); - builder.setTimeFilter(timeFilter); - builder.setDatatypeFilter(getDatatypeFilter()); - builder.setKeyTransform(getFiAggregator()); - builder.setEnv(env); - builder.setNode(node); + + IndexIteratorBuilder builder = getIteratorBuilder(); node.childrenAccept(this, builder); - // A EQNode may be of the form FIELD == null. The evaluation can - // handle this, so we should just not build an IndexIterator for it. - if (null == builder.getValue()) { + // verify that the field exists and is indexed + if (builder.getField() == null || isUnindexed(builder.getField())) { + if (isQueryFullySatisfied) { + log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false in the SatisfactionVisitor"); + } + return null; + } + + // the evaluation can handle a term in the form 'FIELD == null', however no IndexIterator should be built + if (builder.getValue() == null) { if (this.indexOnlyFields.contains(builder.getField())) { QueryException qe = new QueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_RETRIEVAL_ERROR, MessageFormat.format("{0} {1} {2}", "Unable to compare index only field", builder.getField(), "against null")); @@ -636,6 +631,7 @@ public Object visit(ASTNENode node, Object data) { // SatisfactionVisitor should have already initialized this to false if (isQueryFullySatisfied) { + // note: this is different from the ASTEQ method... log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false in the SatisfactionVisitor"); } return null; @@ -645,6 +641,18 @@ public Object visit(ASTNENode node, Object data) { // Add the negated IndexIteratorBuilder to the parent as an *exclude* if (!iterators.hasSeen(builder.getField(), builder.getValue()) && includeReferences.contains(builder.getField()) && !excludeReferences.contains(builder.getField())) { + + // do not perform a deep copy of the source if this iterator has not been seen yet + builder.setQueryId(queryId); + builder.setSource(deepCopySource()); + builder.setTypeMetadata(typeMetadata); + builder.setFieldsToAggregate(fieldsToAggregate); + builder.setTimeFilter(timeFilter); + builder.setDatatypeFilter(getDatatypeFilter()); + builder.setKeyTransform(getFiAggregator()); + builder.setEnv(env); + builder.setNode(node); + iterators.addExclude(builder.build()); } else { // SatisfactionVisitor should have already initialized this to false @@ -664,45 +672,21 @@ public Object visit(ASTMethodNode node, Object data) { @Override public Object visit(ASTEQNode node, Object data) { - IndexIteratorBuilder builder = null; - try { - builder = iteratorBuilderClass.asSubclass(IndexIteratorBuilder.class).getDeclaredConstructor().newInstance(); - } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { - throw new RuntimeException(e); - } + // visit children first to populate the field and value + IndexIteratorBuilder builder = getIteratorBuilder(); + node.childrenAccept(this, builder); - /** - * If we have an unindexed type enforced, we've been configured to assert whether the field is indexed. - */ - if (isUnindexed(node)) { + // verify that field exists and is indexed + if (builder.getField() == null || isUnindexed(builder.getField())) { if (isQueryFullySatisfied) { log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false in the SatisfactionVisitor"); } return null; } - // boolean to tell us if we've overridden our subtree due to - // a negation or - boolean isNegation = false; - if (data instanceof AbstractIteratorBuilder) { - AbstractIteratorBuilder oib = (AbstractIteratorBuilder) data; - isNegation = oib.isInANot(); - } - builder.setQueryId(queryId); - builder.setSource(getSourceIterator(node, isNegation)); - builder.setTimeFilter(getTimeFilter(node)); - builder.setTypeMetadata(typeMetadata); - builder.setFieldsToAggregate(fieldsToAggregate); - builder.setDatatypeFilter(getDatatypeFilter()); - builder.setKeyTransform(getFiAggregator()); - builder.setEnv(env); - builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied); - builder.setNode(node); - node.childrenAccept(this, builder); - // A EQNode may be of the form FIELD == null. The evaluation can - // handle this, so we should just not build an IndexIterator for it. - if (null == builder.getValue()) { - if (this.indexOnlyFields.contains(builder.getField())) { + // the evaluation can handle a term in the form 'FIELD == null', however no IndexIterator should be built + if (builder.getValue() == null) { + if (indexOnlyFields.contains(builder.getField())) { QueryException qe = new QueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_RETRIEVAL_ERROR, MessageFormat.format("{0} {1} {2}", "Unable to compare index only field", builder.getField(), "against null")); throw new DatawaveFatalQueryException(qe); @@ -710,17 +694,21 @@ public Object visit(ASTEQNode node, Object data) { return null; } + // check to see if there is a mismatch between included and exclude references. + // note: this is a lift and shift of old code and probably doesn't work as intended.. + if (!includeReferences.contains(builder.getField()) && excludeReferences.contains(builder.getField())) { + throw new IllegalStateException(builder.getField() + " is a disallowlisted reference."); + } + // We have no parent already defined if (data == null) { // Make this EQNode the root - if (!includeReferences.contains(builder.getField()) && excludeReferences.contains(builder.getField())) { - throw new IllegalStateException(builder.getField() + " is a disallowlisted reference."); - } else if (builder.getField() != null) { - root = builder.build(); + // load the builder just in time and make it the root of the query (query is a single EQ node) + loadBuilder(builder, data, node); + root = builder.build(); - if (log.isTraceEnabled()) { - log.trace("Build IndexIterator: " + root); - } + if (log.isTraceEnabled()) { + log.trace("Build IndexIterator: " + root); } } else { AbstractIteratorBuilder iterators = (AbstractIteratorBuilder) data; @@ -730,6 +718,7 @@ public Object visit(ASTEQNode node, Object data) { final boolean notExcluded = !excludeReferences.contains(builder.getField()); if (isNew && inclusionReference && notExcluded) { + loadBuilder(builder, data, node); iterators.addInclude(builder.build()); } else { if (isQueryFullySatisfied) { @@ -741,14 +730,44 @@ public Object visit(ASTEQNode node, Object data) { return null; } + /** + * Load an {@link IndexIteratorBuilder} with all requisite components. + *

+ * Note: at this point the method call to {@link #getSourceIterator(ASTEQNode, boolean)} WILL deep copy the source + * + * @param builder + * an IndexIteratorBuilder + * @param data + * an existing AbstractIteratorBuilder, or null if this term is the root of the query + * @param node + * the equality term + */ + protected void loadBuilder(IndexIteratorBuilder builder, Object data, ASTEQNode node) { + // boolean to tell us if we've overridden our subtree due to + // a negation or + boolean isNegation = false; + if (data instanceof AbstractIteratorBuilder) { + AbstractIteratorBuilder oib = (AbstractIteratorBuilder) data; + isNegation = oib.isInANot(); + } + builder.setQueryId(queryId); + builder.setSource(getSourceIterator(node, isNegation)); + builder.setTimeFilter(getTimeFilter(node)); + builder.setTypeMetadata(typeMetadata); + builder.setFieldsToAggregate(fieldsToAggregate); + builder.setDatatypeFilter(getDatatypeFilter()); + builder.setKeyTransform(getFiAggregator()); + builder.setEnv(env); + builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied); + builder.setNode(node); + } + protected TimeFilter getTimeFilter(ASTEQNode node) { final String identifier = JexlASTHelper.getIdentifier(node); if (limitLookup && !limitOverride && !fieldsToAggregate.contains(identifier)) { return TimeFilter.alwaysTrue(); } - return timeFilter; - } protected SortedKeyValueIterator getSourceIterator(final ASTEQNode node, boolean negation) { @@ -759,7 +778,7 @@ protected SortedKeyValueIterator getSourceIterator(final ASTEQNode no if (limitLookup && !negation) { if (!disableFiEval && fieldsToAggregate.contains(identifier)) { - kvIter = source.deepCopy(env); + kvIter = deepCopySource(); seekIndexOnlyDocument(kvIter, node); } else if (disableFiEval && fieldsToAggregate.contains(identifier)) { kvIter = createIndexOnlyKey(node); @@ -770,7 +789,7 @@ protected SortedKeyValueIterator getSourceIterator(final ASTEQNode no } } else { - kvIter = source.deepCopy(env); + kvIter = deepCopySource(); seekIndexOnlyDocument(kvIter, node); } @@ -787,7 +806,7 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node); if (null == op || null == op.getLiteralValue()) { // deep copy since this is likely a null literal - return source.deepCopy(env); + return deepCopySource(); } String fn = op.deconstructIdentifier(); @@ -804,7 +823,7 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t SortedKeyValueIterator mySource = limitedSource; // if source size > 0, we are free to use up to that number for this query if (source.getSourceSize() > 0) - mySource = source.deepCopy(env); + mySource = deepCopySource(); mySource.seek(new Range(newStartKey, true, newStartKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL), false), Collections.emptyList(), false); @@ -827,11 +846,8 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t */ protected void seekIndexOnlyDocument(SortedKeyValueIterator kvIter, ASTEQNode node) throws IOException { if (null != rangeLimiter && limitLookup) { - Key newStartKey = getKey(node); - kvIter.seek(new Range(newStartKey, true, newStartKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL), false), Collections.emptyList(), false); - } } @@ -843,7 +859,6 @@ protected void seekIndexOnlyDocument(SortedKeyValueIterator kvIter, A protected Collection> getNodeEntry(ASTEQNode node) { Key key = getKey(node); return Collections.singleton(Maps.immutableEntry(key, Constants.NULL_VALUE)); - } /** @@ -854,7 +869,6 @@ protected Collection> getNodeEntry(ASTEQNode node) { * @return a collection of entries */ protected Collection> getExceededEntry(String identifier, LiteralRange range) { - Key key = getIvaratorKey(identifier, range); return Collections.singleton(Maps.immutableEntry(key, Constants.NULL_VALUE)); @@ -910,12 +924,7 @@ protected Key getKey(JexlNode node) { * @return a key iterator */ protected NestedIterator createExceededCheck(String identifier, LiteralRange range, JexlNode rootNode) { - IndexIteratorBuilder builder = null; - try { - builder = iteratorBuilderClass.asSubclass(IndexIteratorBuilder.class).getDeclaredConstructor().newInstance(); - } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { - throw new RuntimeException(e); - } + IndexIteratorBuilder builder = getIteratorBuilder(); IteratorToSortedKeyValueIterator kvIter = new IteratorToSortedKeyValueIterator(getExceededEntry(identifier, range).iterator()); builder.setQueryId(queryId); @@ -934,12 +943,7 @@ protected NestedIterator createExceededCheck(String identifier, LiteralRang } protected Object visitDelayedIndexOnly(ASTEQNode node, Object data) { - IndexIteratorBuilder builder = null; - try { - builder = iteratorBuilderClass.asSubclass(IndexIteratorBuilder.class).getDeclaredConstructor().newInstance(); - } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { - throw new RuntimeException(e); - } + IndexIteratorBuilder builder = getIteratorBuilder(); /** * If we have an unindexed type enforced, we've been configured to assert whether the field is indexed. @@ -1299,7 +1303,7 @@ public void ivarateFilter(JexlNode rootNode, JexlNode sourceNode, Object data, L */ private void contextRequiredRegex(ASTAndNode and, JexlNode source, Object data) { - SortedKeyValueIterator sourceCopy = this.source.deepCopy(env); + SortedKeyValueIterator sourceCopy = deepCopySource(); String field = JexlASTHelper.getIdentifier(source); String literal = String.valueOf(JexlASTHelper.getLiteralValue(source)); @@ -1331,7 +1335,7 @@ private void contextRequiredRegex(ASTAndNode and, JexlNode source, Object data) */ private void contextRequiredRange(ASTAndNode and, JexlNode source, Object data) { - SortedKeyValueIterator sourceCopy = this.source.deepCopy(env); + SortedKeyValueIterator sourceCopy = deepCopySource(); LiteralRange range = JexlASTHelper.findRange().getRange(source); RangeFilterIterator include = new RangeFilterIterator(); @@ -1510,6 +1514,23 @@ public void ivarate(IvaratorBuilder builder, JexlNode rootNode, JexlNode sourceN } } + public IndexIteratorBuilder getIteratorBuilder() { + try { + return iteratorBuilderClass.asSubclass(IndexIteratorBuilder.class).getDeclaredConstructor().newInstance(); + } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) { + throw new RuntimeException(e); + } + } + + protected SortedKeyValueIterator deepCopySource() { + deepCopiesCalled++; + return source.deepCopy(env); + } + + public int getDeepCopiesCalled() { + return deepCopiesCalled; + } + /** * Get the DatatypeFilter * @@ -1592,6 +1613,10 @@ protected boolean isUnindexed(ASTIdentifier node) { return unindexedFields.contains(fieldName); } + protected boolean isUnindexed(String fieldName) { + return unindexedFields.contains(fieldName); + } + public IteratorBuildingVisitor setUnindexedFields(Collection unindexedField) { this.unindexedFields.addAll(unindexedField); return this; diff --git a/warehouse/query-core/src/main/java/datawave/query/tld/TLDIndexBuildingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/tld/TLDIndexBuildingVisitor.java index 719bee71028..2c20c1d4151 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tld/TLDIndexBuildingVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/tld/TLDIndexBuildingVisitor.java @@ -22,6 +22,7 @@ import datawave.data.type.NoOpType; import datawave.query.Constants; import datawave.query.iterator.builder.AbstractIteratorBuilder; +import datawave.query.iterator.builder.IndexIteratorBuilder; import datawave.query.jexl.JexlASTHelper; import datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral; import datawave.query.jexl.LiteralRange; @@ -44,7 +45,7 @@ public Object visit(ASTNENode node, Object data) { throw new IllegalStateException("Root node cannot be a negation"); } TLDIndexIteratorBuilder builder = new TLDIndexIteratorBuilder(); - builder.setSource(source.deepCopy(env)); + builder.setSource(deepCopySource()); builder.setTypeMetadata(typeMetadata); builder.setDatatypeFilter(getDatatypeFilter()); builder.setFieldsToAggregate(fieldsToAggregate); @@ -94,7 +95,7 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node); if (null == op || null == op.getLiteralValue()) { // deep copy since this is likely a null literal - return source.deepCopy(env); + return deepCopySource(); } String fn = op.deconstructIdentifier(); @@ -111,7 +112,7 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t SortedKeyValueIterator mySource = limitedSource; // if source size > 0, we are free to use up to that number for this query if (source.getSourceSize() > 0) - mySource = source.deepCopy(env); + mySource = deepCopySource(); mySource.seek(new Range(newStartKey, true, newStartKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL), false), Collections.emptyList(), false); @@ -126,63 +127,50 @@ protected SortedKeyValueIterator createIndexOnlyKey(ASTEQNode node) t @Override public Object visit(ASTEQNode node, Object data) { - /** - * If we have an unindexed type enforced, we've been configured to assert whether the field is indexed. - */ - if (isUnindexed(node)) { - if (isQueryFullySatisfied == true) { + + TLDIndexIteratorBuilder builder = new TLDIndexIteratorBuilder(); + node.childrenAccept(this, builder); + + // verify that the field exists and is indexed + if (builder.getField() == null || isUnindexed(builder.getField())) { + if (isQueryFullySatisfied) { log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false in the SatisfactionVisitor"); } return null; } - TLDIndexIteratorBuilder builder = new TLDIndexIteratorBuilder(); - boolean isNegation = false; - if (data instanceof AbstractIteratorBuilder) { - AbstractIteratorBuilder oib = (AbstractIteratorBuilder) data; - isNegation = oib.isInANot(); - } - builder.setSource(getSourceIterator(node, isNegation)); - builder.setTimeFilter(getTimeFilter(node)); - builder.setTypeMetadata(typeMetadata); - builder.setDatatypeFilter(getDatatypeFilter()); - builder.setFieldsToAggregate(fieldsToAggregate); - builder.setKeyTransform(getFiAggregator()); - builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied); - builder.setNode(node); - node.childrenAccept(this, builder); - - // A EQNode may be of the form FIELD == null. The evaluation can - // handle this, so we should just not build an IndexIterator for it. - if (null == builder.getValue()) { - if (isQueryFullySatisfied == true) { + // check for the case 'FIELD == null' + if (builder.getValue() == null) { + if (isQueryFullySatisfied) { throw new RuntimeException("Determined that isQueryFullySatisfied should be false, but it was not preset to false by the SatisfactionVisitor"); } return null; } + // check to see if there is a mismatch between included and exclude references. + // note: this is a lift and shift of old code and probably doesn't work as intended.. + if (!includeReferences.contains(builder.getField()) && excludeReferences.contains(builder.getField())) { + throw new IllegalStateException(builder.getField() + " is a disallowlisted reference."); + } + // We have no parent already defined if (data == null) { // Make this EQNode the root - if (!includeReferences.contains(builder.getField()) && excludeReferences.contains(builder.getField())) { - throw new IllegalStateException(builder.getField() + " is a disallowlisted reference."); - } else { - root = builder.build(); - - if (log.isTraceEnabled()) { - log.trace("Build IndexIterator: " + root); - } + loadBuilder(builder, data, node); + root = builder.build(); + if (log.isTraceEnabled()) { + log.trace("Build IndexIterator: " + root); } } else { AbstractIteratorBuilder iterators = (AbstractIteratorBuilder) data; // Add this IndexIterator to the parent if (!iterators.hasSeen(builder.getField(), builder.getValue()) && includeReferences.contains(builder.getField()) && !excludeReferences.contains(builder.getField())) { + loadBuilder(builder, data, node); iterators.addInclude(builder.build()); } else { - if (isQueryFullySatisfied == true) { - throw new RuntimeException( - "Determined that isQueryFullySatisfied should be false, but it was not preset to false by the SatisfactionVisitor"); + if (isQueryFullySatisfied) { + log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false by the SatisfactionVisitor"); } } } diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorIT.java b/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorIT.java index 244f076503b..13c007d5cf8 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/QueryIteratorIT.java @@ -264,6 +264,38 @@ public void indexOnly_shardRange_test() throws IOException { indexOnly_test(seekRange, query, false, Collections.EMPTY_LIST, Collections.EMPTY_LIST); } + @Test + public void indexOnly_documentSpecific_intersection_noExtraSourceDeepCopies_test() throws IOException { + // build the seek range for a document specific pull + Range seekRange = getDocumentRange("123.345.456"); + String query = "INDEX_ONLY_FIELD1 == 'apple' && INDEX_ONLY_FIELD1 == 'apple'"; + indexOnly_test(seekRange, query, false, Collections.emptyList(), Collections.emptyList()); + } + + @Test + public void indexOnly_shardRange_intersection_noExtraSourceDeepCopies_test() throws IOException { + // build the seek range for a document specific pull + Range seekRange = getShardRange(); + String query = "INDEX_ONLY_FIELD1 == 'apple' && INDEX_ONLY_FIELD1 == 'apple'"; + indexOnly_test(seekRange, query, false, Collections.emptyList(), Collections.emptyList()); + } + + @Test + public void indexOnly_documentSpecific_union_noExtraSourceDeepCopies_test() throws IOException { + // build the seek range for a document specific pull + Range seekRange = getDocumentRange("123.345.456"); + String query = "INDEX_ONLY_FIELD1 == 'apple' || INDEX_ONLY_FIELD1 == 'apple'"; + indexOnly_test(seekRange, query, false, Collections.emptyList(), Collections.emptyList()); + } + + @Test + public void indexOnly_shardRange_union_noExtraSourceDeepCopies_test() throws IOException { + // build the seek range for a shard + Range seekRange = getShardRange(); + String query = "INDEX_ONLY_FIELD1 == 'apple' || INDEX_ONLY_FIELD1 == 'apple'"; + indexOnly_test(seekRange, query, false, Collections.emptyList(), Collections.emptyList()); + } + @Test public void indexOnly_documentSpecific_hitTerm_test() throws IOException { options.put(JexlEvaluation.HIT_TERM_FIELD, "true"); @@ -276,7 +308,7 @@ public void indexOnly_documentSpecific_hitTerm_test() throws IOException { @Test public void indexOnly_shardRange_hitTerm_test() throws IOException { options.put(JexlEvaluation.HIT_TERM_FIELD, "true"); - // build the seek range for a document specific pull + // build the seek range for a shard Range seekRange = getShardRange(); String query = "INDEX_ONLY_FIELD1 == 'apple'"; indexOnly_test(seekRange, query, false, Collections.EMPTY_LIST, Collections.EMPTY_LIST); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java index 8701e24f7a0..a774dd6577f 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java @@ -59,6 +59,7 @@ public void visitEqNode_nullValueTest() { IteratorBuildingVisitor visitor = getDefault(); Assert.assertEquals(null, node.jjtAccept(visitor, null)); + Assert.assertEquals(0, visitor.getDeepCopiesCalled()); } /** @@ -94,6 +95,7 @@ public void visitNeNode_nullValueTest() throws ParseException { Assert.assertNotEquals(null, nestedIterator); Assert.assertEquals(1, nestedIterator.leaves().size()); Assert.assertTrue(nestedIterator.leaves().iterator().next().toString().contains("FOO")); + Assert.assertEquals(1, visitor.getDeepCopiesCalled()); } /** @@ -112,6 +114,28 @@ public void visitNeNode_nullValueIndexOnlyTest() throws ParseException { Assert.assertFalse(true); } + @Test + public void testIntersectionOfRepeatedTermsDoesNotProduceAdditionalSourceDeepCopies() throws Exception { + String query = "FIELD == 'value' && FIELD == 'value' && FIELD == 'value'"; + ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query); + + IteratorBuildingVisitor visitor = getDefault(); + script.jjtAccept(visitor, null); + + Assert.assertEquals(1, visitor.getDeepCopiesCalled()); + } + + @Test + public void testUnionOfRepeatedTermsDoesNotProduceAdditionalSourceDeepCopies() throws Exception { + String query = "FIELD == 'value' || FIELD == 'value' || FIELD == 'value'"; + ASTJexlScript script = JexlASTHelper.parseAndFlattenJexlQuery(query); + + IteratorBuildingVisitor visitor = getDefault(); + script.jjtAccept(visitor, null); + + Assert.assertEquals(1, visitor.getDeepCopiesCalled()); + } + @Test public void buildLiteralRange_trailingWildcardTest() throws ParseException { ASTJexlScript query = JexlASTHelper.parseJexlQuery("FOO =~ 'bar.*'"); From 5dab6cc6f9dc02b69b25bbbb13b2ea1d7823c1d8 Mon Sep 17 00:00:00 2001 From: Ivan Bella <347158+ivakegg@users.noreply.github.com> Date: Wed, 15 Jan 2025 13:30:52 +0000 Subject: [PATCH 12/16] Revert "summaries for multiple views fixup (#2674)" This reverts commit 67760a51e877cab0d8305a90060b13eeb8ebbf9b. --- .../query/iterator/logic/SummaryCreator.java | 12 ++++++------ .../datawave/query/tables/ShardQueryLogic.java | 4 ++-- .../logic/ContentSummaryIteratorTest.java | 3 ++- .../java/datawave/query/util/SummaryTest.java | 16 ---------------- .../java/datawave/query/util/WiseGuysIngest.java | 1 - 5 files changed, 10 insertions(+), 26 deletions(-) diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java index 4c802b3b271..7a7ec00d501 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java @@ -56,9 +56,9 @@ private static String getEndingWildcardSummary(String currentViewName, Map summaries = new HashMap<>(); for (Map.Entry entry : foundContent.entrySet()) { // first part is view, second part is if compressed still - String[] temp = entry.getKey().split(Constants.COLON); - if (temp[0].startsWith(currentViewName)) { - summaries.put(temp[0], getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(temp[1]))); + String[] s = entry.getKey().split(Constants.COLON); + if (s[0].startsWith(currentViewName)) { + summaries.put(entry.getKey(), getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(s[1]))); } } if (!summaries.isEmpty()) { @@ -76,9 +76,9 @@ private static String getEndingWildcardSummary(String currentViewName, Map foundContent, int summarySize) { for (Map.Entry entry : foundContent.entrySet()) { // first part is view, second part is if compressed still - String[] temp = entry.getKey().split(Constants.COLON); - if (temp[0].equals(currentViewName)) { - return currentViewName + ": " + getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(temp[1])); + String[] s = entry.getKey().split(Constants.COLON); + if (s[0].equals(currentViewName)) { + return currentViewName + ": " + getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(s[1])); } } return null; diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index d6ff6b93b34..e487962fe46 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -1549,11 +1549,11 @@ public void setSummaryOptions(SummaryOptions summaryOptions) { getConfig().setSummaryOptions(summaryOptions); } - public String getSummaryIterator() { + public String getSummaryIteratorClassName() { return getConfig().getSummaryIterator().getName(); } - public void setSummaryIterator(String iteratorClass) { + public void setSummaryIteratorClassName(String iteratorClass) { try { getConfig().setSummaryIterator((Class>) Class.forName(iteratorClass)); } catch (Exception e) { diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java index cf427bbc260..876698d9f1a 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java @@ -223,7 +223,8 @@ public void testMatchFoundWithTrailingRegex() throws IOException { Key topKey = iterator.getTopKey(); assertEquals(row, topKey.getRow()); assertEquals(new Text("pdf" + Constants.NULL + "111.222.333"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT31: test content wildcard matching one\nCONTENT32: test content wildcard matching two"), topKey.getColumnQualifier()); + assertEquals(new Text("CONTENT31:true: test content wildcard matching one\nCONTENT32:true: test content wildcard matching two"), + topKey.getColumnQualifier()); } @Test diff --git a/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java b/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java index ee4281b1620..9d59f63b0e1 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java @@ -379,20 +379,4 @@ public void testOnlyWithNoOtherOptions() throws Exception { runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); } - - @Test - public void testMultiView() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:50/VIEWS:CONTENT*/ONLY)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of("CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gu" - + "\nCONTENT2: A lawyer and his briefcase can steal more than ten: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } } diff --git a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java index 154665145fb..115926276e6 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java @@ -773,7 +773,6 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw addDColumn(datatype, corleoneUID, "CONTENT", "Im gonna make him an offer he cant refuse", bw); addDColumn(datatype, sopranoUID, "CONTENT", "If you can quote the rules then you can obey them", bw); addDColumn(datatype, caponeUID, "CONTENT", "You can get much farther with a kind word and a gun than you can with a kind word alone", bw); - addDColumn(datatype, caponeUID, "CONTENT2", "A lawyer and his briefcase can steal more than ten men with guns.", bw); } finally { if (null != bw) { bw.close(); From 0c28a11eeafd9ea99c876e4e3df52ead3124d609 Mon Sep 17 00:00:00 2001 From: Ivan Bella <347158+ivakegg@users.noreply.github.com> Date: Wed, 15 Jan 2025 13:31:04 +0000 Subject: [PATCH 13/16] Revert "Feature/create summaries (#2649)" This reverts commit 9ca79cb341c92e0adb69d25c7d4a880227d7b73a. --- .../main/java/datawave/query/Constants.java | 2 - .../java/datawave/query/QueryParameters.java | 5 - .../query/attributes/SummaryOptions.java | 229 --------- .../query/config/ShardQueryConfiguration.java | 29 -- .../query/iterator/QueryIterator.java | 24 - .../datawave/query/iterator/QueryOptions.java | 42 -- .../logic/ContentSummaryIterator.java | 348 -------------- .../query/iterator/logic/SummaryCreator.java | 101 ---- .../logic/TermFrequencyExcerptIterator.java | 6 +- .../query/jexl/functions/QueryFunctions.java | 1 - .../functions/QueryFunctionsDescriptor.java | 4 - .../QueryOptionsFromQueryVisitor.java | 8 +- .../functions/jexl/SummaryOptions.java | 57 --- .../parser/lucene/AccumuloSyntaxParser.java | 78 ++- .../parser/lucene/AccumuloSyntaxParser.jj | 4 +- .../AccumuloSyntaxParserTokenManager.java | 444 +++++++++--------- ...java => AcumuloSyntaxParserConstants.java} | 41 +- .../query/planner/DefaultQueryPlanner.java | 9 - .../query/planner/QueryOptionsSwitch.java | 6 +- .../table/parser/ContentKeyValueFactory.java | 30 +- .../query/tables/ShardQueryLogic.java | 29 -- .../query/transformer/SummaryTransform.java | 276 ----------- .../config/ShardQueryConfigurationTest.java | 8 +- .../logic/ContentSummaryIteratorTest.java | 256 ---------- .../java/datawave/query/util/SummaryTest.java | 382 --------------- .../datawave/query/util/WiseGuysIngest.java | 29 -- .../datawave/query/QueryLogicFactory.xml | 1 - 27 files changed, 270 insertions(+), 2179 deletions(-) delete mode 100644 warehouse/query-core/src/main/java/datawave/query/attributes/SummaryOptions.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/iterator/logic/ContentSummaryIterator.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java delete mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/SummaryOptions.java rename warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/{AccumuloSyntaxParserConstants.java => AcumuloSyntaxParserConstants.java} (54%) delete mode 100644 warehouse/query-core/src/main/java/datawave/query/transformer/SummaryTransform.java delete mode 100644 warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java delete mode 100644 warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/Constants.java b/warehouse/query-core/src/main/java/datawave/query/Constants.java index a38f36f874f..c1ab9ff4f56 100644 --- a/warehouse/query-core/src/main/java/datawave/query/Constants.java +++ b/warehouse/query-core/src/main/java/datawave/query/Constants.java @@ -79,8 +79,6 @@ public class Constants { // From ingest public static final Text TERM_FREQUENCY_COLUMN_FAMILY = new Text("tf"); - public static final Text D_COLUMN_FAMILY = new Text("d"); - // content functions public static final String TERM_OFFSET_MAP_JEXL_VARIABLE_NAME = ContentFunctions.TERM_OFFSET_MAP_JEXL_VARIABLE_NAME; public static final String CONTENT_FUNCTION_NAMESPACE = ContentFunctions.CONTENT_FUNCTION_NAMESPACE; diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java index c0e6577ca83..48979701c3d 100644 --- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java +++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java @@ -216,11 +216,6 @@ public class QueryParameters { */ public static final String EXCERPT_FIELDS = "excerpt.fields"; - /** - * Used to specify summaries that should be returned. - */ - public static final String SUMMARY_OPTIONS = "summary.options"; - /** * Used to specify model or DB fields that should be treated as lenient (can be skipped if normalization fails) */ diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/SummaryOptions.java b/warehouse/query-core/src/main/java/datawave/query/attributes/SummaryOptions.java deleted file mode 100644 index db9acd3e394..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/SummaryOptions.java +++ /dev/null @@ -1,229 +0,0 @@ -package datawave.query.attributes; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Objects; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; - -import datawave.query.Constants; -import datawave.query.postprocessing.tf.PhraseIndexes; - -/** - * Represents options for a summary that have been specified within an #SUMMARY_SIZE function. An instance of {@link SummaryOptions} can easily be captured as a - * parameter string using {@link SummaryOptions#toString()}, and transformed back into a {@link SummaryOptions} instance via - * {@link SummaryOptions#from(String)}. - */ -public class SummaryOptions implements Serializable { - - private static final long serialVersionUID = 6769159729743311079L; - - private static final Logger log = LoggerFactory.getLogger(SummaryOptions.class); - - private static final String SIZE_PARAMETER = "SIZE"; - private static final String VIEWS_PARAMETER = "VIEWS"; - private static final String ONLY_PARAMETER = "ONLY"; - - public static final int DEFAULT_SIZE = 150; - - private int summarySize; - private ArrayList viewNamesList; - /** - * When set, we will only use the view names passed in to the function to attempt to make summaries from (clears out default list). - *

- *

- * When this is set, the user should also pass in a list of view names or else there is no chance of summaries being returned. - */ - private boolean onlyListedViews; - - public SummaryOptions() { - summarySize = 0; - viewNamesList = new ArrayList<>(); - onlyListedViews = false; - } - - /** - * Returns a new {@link SummaryOptions} parsed from the string. The provided string is expected to have the format returned by - * {@link SummaryOptions#toString()}. - *
    - *
  • Given null, null will be returned.
  • - *
  • Given an empty or blank string, a {@link SummaryOptions} with a size of DEFAULT_SIZE (currently 150) will be returned.
  • - *
  • Given {@code SIZE:50/ONLY/VIEWS:CONTENT1,CONTENT2}, an {@link SummaryOptions} will be returned with a size of 50 (size is number of characters), only - * using the specified view names, and list of view names of (CONTENT1, CONTENT2). - *
  • Given malformed input, will return an empty {@link SummaryOptions}.
  • - *
- * - * @param string - * the string to parse - * @return the parsed {@link SummaryOptions} - */ - @JsonCreator - public static SummaryOptions from(String string) { - if (string == null) { - return null; - } - // Strip whitespaces. - string = PhraseIndexes.whitespacePattern.matcher(string).replaceAll(""); - - SummaryOptions summaryOptions = new SummaryOptions(); - - // if passed no parameters, return of summary of default size - if (string.isBlank()) { - summaryOptions.summarySize = DEFAULT_SIZE; - return summaryOptions; - } - - try { - // split on / to get the separate options - String[] parameterParts = string.split(Constants.FORWARD_SLASH); - - // go through each option and try to set them - for (String parameterPart : parameterParts) { - // for options that are "key:value", split on colon to get the key - String[] parts = parameterPart.split(Constants.COLON); - // if we have the "size" option... - if (parts[0].equalsIgnoreCase(SIZE_PARAMETER)) { - int size = Integer.parseInt(parts[1]); - if (size == 0) { - return new SummaryOptions(); - } - summaryOptions.summarySize = size; - } - // if we have the "only" option... - else if (parts[0].equalsIgnoreCase(ONLY_PARAMETER)) { - summaryOptions.onlyListedViews = true; - } - // if we have the "views" option... - else if (parts[0].equalsIgnoreCase(VIEWS_PARAMETER)) { - // the view names are split by commas. split them, uppercase them, then add the to the list. - String[] names = parts[1].split(Constants.COMMA); - for (String name : names) { - summaryOptions.viewNamesList.add(name.toUpperCase()); - } - } - } - // if size was not specified, make it DEFAULT_SIZE - if (summaryOptions.summarySize == 0) { - summaryOptions.summarySize = DEFAULT_SIZE; - } - } catch (NumberFormatException e) { - log.warn("Unable to parse summary size string, returning empty SummaryOptions: {}", string, e); - return new SummaryOptions(); - } - - return summaryOptions; - } - - /** - * Returns a copy of the given {@link SummaryOptions} - * - * @param other - * the instance to copy - * @return the copy - */ - public static SummaryOptions copyOf(SummaryOptions other) { - if (other == null) { - return null; - } - SummaryOptions summaryOptions = new SummaryOptions(); - summaryOptions.summarySize = other.summarySize; - summaryOptions.viewNamesList = new ArrayList<>(other.viewNamesList); - summaryOptions.onlyListedViews = other.onlyListedViews; - return summaryOptions; - } - - public int getSummarySize() { - return summarySize; - } - - public boolean onlyListedViews() { - return onlyListedViews; - } - - /** - * Replace a view name with another view name - * - * @param viewName - * the one to replace - * @param replacement - * the one to replace the other - */ - public void replace(String viewName, String replacement) { - int index = viewNamesList.indexOf(viewName); - if (index != -1) { - viewNamesList.set(index, replacement); - } - } - - /** - * Return whether this {@link SummaryOptions} view names list is empty. - * - * @return true if empty, or false otherwise - */ - public boolean isEmpty() { - return viewNamesList.isEmpty(); - } - - public String viewNamesListToString() { - if (viewNamesList.isEmpty()) { - return ""; - } - - StringBuilder sb = new StringBuilder(); - for (String viewName : viewNamesList) { - sb.append(viewName).append(Constants.COMMA); - } - return sb.substring(0, sb.length() - 1); - } - - public static String[] viewNamesListFromString(String string) { - return string.split(Constants.COMMA); - } - - /** - * Returns this {@link SummaryOptions} as a formatted string that can later be parsed back into a {@link SummaryOptions} using - * {@link SummaryOptions#from(String)}. This is also what will be used when serializing a {@link SummaryOptions} to JSON/XML. The string will have the - * format {@code SIZE:size/[only]/[NAMES:contentName1, contentName2, ....]}. - * - * @return a formatted string - */ - @JsonValue - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(SIZE_PARAMETER).append(":").append(summarySize); - if (onlyListedViews) { - sb.append("/").append(ONLY_PARAMETER); - } - if (!viewNamesList.isEmpty()) { - sb.append("/").append(VIEWS_PARAMETER).append(":"); - for (String viewName : viewNamesList) { - sb.append(viewName).append(Constants.COMMA); - } - return sb.substring(0, sb.length() - 1); - } - return sb.toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SummaryOptions that = (SummaryOptions) o; - return Objects.equals(summarySize, that.summarySize) && Objects.equals(viewNamesList, that.viewNamesList) - && Objects.equals(onlyListedViews, that.onlyListedViews); - } - - @Override - public int hashCode() { - return Objects.hash(summarySize, viewNamesList, onlyListedViews); - } -} diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index b88c9527ff2..a6dd448a4ec 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -47,13 +47,11 @@ import datawave.query.DocumentSerialization.ReturnType; import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; -import datawave.query.attributes.SummaryOptions; import datawave.query.attributes.UniqueFields; import datawave.query.common.grouping.GroupFields; import datawave.query.function.DocumentPermutation; import datawave.query.iterator.QueryIterator; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; -import datawave.query.iterator.logic.ContentSummaryIterator; import datawave.query.iterator.logic.TermFrequencyExcerptIterator; import datawave.query.jexl.JexlASTHelper; import datawave.query.jexl.visitors.JexlStringBuildingVisitor; @@ -439,11 +437,6 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement // The class for the excerpt iterator private Class> excerptIterator = TermFrequencyExcerptIterator.class; - private SummaryOptions summaryOptions = new SummaryOptions(); - - // The class for the summary iterator - private Class> summaryIterator = ContentSummaryIterator.class; - /** * A bloom filter to avoid duplicate results if needed */ @@ -756,8 +749,6 @@ public void copyFrom(ShardQueryConfiguration other) { this.setStrictFields(other.getStrictFields()); this.setExcerptFields(ExcerptFields.copyOf(other.getExcerptFields())); this.setExcerptIterator(other.getExcerptIterator()); - this.setSummaryOptions(SummaryOptions.copyOf(other.getSummaryOptions())); - this.setSummaryIterator(other.getSummaryIterator()); this.setFiFieldSeek(other.getFiFieldSeek()); this.setFiNextSeek(other.getFiNextSeek()); this.setEventFieldSeek(other.getEventFieldSeek()); @@ -2631,24 +2622,6 @@ public void setExcerptIterator(Class this.excerptIterator = excerptIterator; } - public SummaryOptions getSummaryOptions() { - return summaryOptions; - } - - public void setSummaryOptions(SummaryOptions summaryOptions) { - if (summaryOptions != null) { - this.summaryOptions = summaryOptions; - } - } - - public Class> getSummaryIterator() { - return summaryIterator; - } - - public void setSummaryIterator(Class> summaryIterator) { - this.summaryIterator = summaryIterator; - } - public int getFiFieldSeek() { return fiFieldSeek; } @@ -3038,7 +3011,6 @@ public boolean equals(Object o) { Objects.equals(getLenientFields(), that.getLenientFields()) && Objects.equals(getStrictFields(), that.getStrictFields()) && Objects.equals(getExcerptFields(), that.getExcerptFields()) && - Objects.equals(getSummaryOptions(), that.getSummaryOptions()) && getFiFieldSeek() == that.getFiFieldSeek() && getFiNextSeek() == that.getFiNextSeek() && getEventFieldSeek() == that.getEventFieldSeek() && @@ -3247,7 +3219,6 @@ public int hashCode() { getLenientFields(), getStrictFields(), getExcerptFields(), - getSummaryOptions(), getFiFieldSeek(), getFiNextSeek(), getEventFieldSeek(), diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index f03e1d6f310..b36d1467667 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -114,7 +114,6 @@ import datawave.query.tracking.ActiveQuery; import datawave.query.tracking.ActiveQueryLog; import datawave.query.transformer.ExcerptTransform; -import datawave.query.transformer.SummaryTransform; import datawave.query.transformer.UniqueTransform; import datawave.query.util.EmptyContext; import datawave.query.util.EntryToTuple; @@ -200,8 +199,6 @@ public class QueryIterator extends QueryOptions implements YieldingKeyValueItera protected ExcerptTransform excerptTransform = null; - protected SummaryTransform summaryTransform = null; - protected RangeProvider rangeProvider; public QueryIterator() {} @@ -820,11 +817,6 @@ public Entry apply(@Nullable Entry input) { documents = excerptTransform.getIterator(documents); } - SummaryTransform summaryTransform = getSummaryTransform(); - if (summaryTransform != null) { - documents = summaryTransform.getIterator(documents); - } - // a hook to allow mapping the document such as with the TLD or Parent // query logics // or if the document was not aggregated in the first place because the @@ -1621,22 +1613,6 @@ protected ExcerptTransform getExcerptTransform() { return excerptTransform; } - protected SummaryTransform getSummaryTransform() { - if (summaryTransform == null && getSummaryOptions() != null && getSummaryOptions().getSummarySize() != 0) { - synchronized (getSummaryOptions()) { - if (summaryTransform == null) { - try { - summaryTransform = new SummaryTransform(summaryOptions, myEnvironment, sourceForDeepCopies.deepCopy(myEnvironment), - summaryIterator.getDeclaredConstructor().newInstance()); - } catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) { - throw new RuntimeException("Could not create summary transform", e); - } - } - } - } - return summaryTransform; - } - /** * Get a default implementation of a {@link RangeProvider} * diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java index b1f601c7a48..01a6bcb51f2 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryOptions.java @@ -61,7 +61,6 @@ import datawave.query.DocumentSerialization; import datawave.query.attributes.Document; import datawave.query.attributes.ExcerptFields; -import datawave.query.attributes.SummaryOptions; import datawave.query.attributes.UniqueFields; import datawave.query.common.grouping.GroupFields; import datawave.query.composite.CompositeMetadata; @@ -81,7 +80,6 @@ import datawave.query.iterator.filter.KeyIdentity; import datawave.query.iterator.filter.StringToText; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; -import datawave.query.iterator.logic.ContentSummaryIterator; import datawave.query.iterator.logic.IndexIterator; import datawave.query.iterator.logic.TermFrequencyExcerptIterator; import datawave.query.jexl.DefaultArithmetic; @@ -263,10 +261,6 @@ public class QueryOptions implements OptionDescriber { public static final String EXCERPT_ITERATOR = "excerpt.iterator.class"; - public static final String SUMMARY_OPTIONS = "summary.options"; - - public static final String SUMMARY_ITERATOR = "summary.iterator.class"; - // field and next thresholds before a seek is issued public static final String FI_FIELD_SEEK = "fi.field.seek"; public static final String FI_NEXT_SEEK = "fi.next.seek"; @@ -441,10 +435,6 @@ public class QueryOptions implements OptionDescriber { protected Class> excerptIterator = TermFrequencyExcerptIterator.class; - protected SummaryOptions summaryOptions; - - protected Class> summaryIterator = ContentSummaryIterator.class; - // off by default, controls when to issue a seek private int fiFieldSeek = -1; private int fiNextSeek = -1; @@ -566,8 +556,6 @@ public void deepCopy(QueryOptions other) { this.excerptFields = other.excerptFields; this.excerptFieldsNoHitCallout = other.excerptFieldsNoHitCallout; this.excerptIterator = other.excerptIterator; - this.summaryOptions = other.summaryOptions; - this.summaryIterator = other.summaryIterator; this.fiFieldSeek = other.fiFieldSeek; this.fiNextSeek = other.fiNextSeek; @@ -1281,22 +1269,6 @@ public void setExcerptIterator(Class this.excerptIterator = excerptIterator; } - public SummaryOptions getSummaryOptions() { - return summaryOptions; - } - - public void setSummaryOptions(SummaryOptions summaryOptions) { - this.summaryOptions = summaryOptions; - } - - public Class> getSummaryIterator() { - return summaryIterator; - } - - public void setSummaryIterator(Class> summaryIterator) { - this.summaryIterator = summaryIterator; - } - @Override public IteratorOptions describeOptions() { Map options = new HashMap<>(); @@ -1390,8 +1362,6 @@ public IteratorOptions describeOptions() { options.put(EXCERPT_FIELDS, "excerpt fields"); options.put(EXCERPT_FIELDS_NO_HIT_CALLOUT, "excerpt fields no hit callout"); options.put(EXCERPT_ITERATOR, "excerpt iterator class (default datawave.query.iterator.logic.TermFrequencyExcerptIterator"); - options.put(SUMMARY_OPTIONS, "The size of the summary to return with possible options (ONLY) and list of contentNames"); - options.put(SUMMARY_ITERATOR, "summary iterator class (default datawave.query.iterator.logic.ContentSummaryIterator"); options.put(FI_FIELD_SEEK, "The number of fields traversed by a Field Index data filter or aggregator before a seek is issued"); options.put(FI_NEXT_SEEK, "The number of next calls made by a Field Index data filter or aggregator before a seek is issued"); options.put(EVENT_FIELD_SEEK, "The number of fields traversed by an Event data filter or aggregator before a seek is issued"); @@ -1909,18 +1879,6 @@ public boolean validateOptions(Map options) { } } - if (options.containsKey(SUMMARY_OPTIONS)) { - setSummaryOptions(SummaryOptions.from(options.get(SUMMARY_OPTIONS))); - } - - if (options.containsKey(SUMMARY_ITERATOR)) { - try { - setSummaryIterator((Class>) Class.forName(options.get(SUMMARY_ITERATOR))); - } catch (ClassNotFoundException e) { - throw new RuntimeException("Could not get class for " + options.get(SUMMARY_ITERATOR), e); - } - } - return true; } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ContentSummaryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ContentSummaryIterator.java deleted file mode 100644 index 73747f1f67a..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ContentSummaryIterator.java +++ /dev/null @@ -1,348 +0,0 @@ -package datawave.query.iterator.logic; - -import static datawave.query.iterator.logic.TermFrequencyExcerptIterator.getDtUid; -import static datawave.query.iterator.logic.TermFrequencyExcerptIterator.getDtUidFromEventKey; -import static datawave.query.iterator.logic.TermFrequencyExcerptIterator.getSortedCFs; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedSet; - -import org.apache.accumulo.core.data.ArrayByteSequence; -import org.apache.accumulo.core.data.ByteSequence; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.iterators.IteratorEnvironment; -import org.apache.accumulo.core.iterators.SortedKeyValueIterator; -import org.apache.hadoop.io.Text; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import datawave.query.Constants; -import datawave.query.attributes.SummaryOptions; -import datawave.query.table.parser.ContentKeyValueFactory; - -/** - * This iterator is intended to scan the d column for a specified document. The result will be a summary for each document scanned. - */ -public class ContentSummaryIterator implements SortedKeyValueIterator { - private static final Logger log = LoggerFactory.getLogger(ContentSummaryIterator.class); - private static final Collection D_COLUMN_FAMILY_BYTE_SEQUENCE = Collections - .singleton(new ArrayByteSequence(Constants.D_COLUMN_FAMILY.getBytes())); - - public static final String SUMMARY_SIZE = "summary.size"; - - public static final String VIEW_NAMES = "view.names"; - - public static final String ONLY_SPECIFIED = "only.specified"; - - private static final int MAX_SUMMARY_SIZE = 1500; - - // 100 megabytes - private static final int MAX_CONTENT_SIZE = 100 * 1024 * 1024; - - /** - * A list of view names to potentially create a summary for. The closer to the front in the list, the higher the priority to get a summary for that view - */ - protected final ArrayList viewSummaryOrder = new ArrayList<>(); - - /** the size in bytes of the summary to create */ - protected int summarySize; - - /** if we will only look at the view names specified in the query */ - protected boolean onlySpecified; - - /** the underlying source */ - protected SortedKeyValueIterator source; - - /** The specified dt/uid column families */ - protected SortedSet columnFamilies; - - /** inclusive or exclusive dt/uid column families */ - protected boolean inclusive; - - /** the underlying D column scan range */ - protected Range scanRange; - - /** the top key */ - protected Key tk; - - /** the top value */ - protected Value tv; - - @Override - public boolean hasTop() { - return tk != null; - } - - @Override - public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { - ContentSummaryIterator it = new ContentSummaryIterator(); - it.source = source.deepCopy(env); - return it; - } - - @Override - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { - this.source = source; - - viewSummaryOrder.add("CONTENT"); - - if (options.containsKey(SUMMARY_SIZE)) { - this.summarySize = Math.max(1, Math.min(Integer.parseInt(options.get(SUMMARY_SIZE)), MAX_SUMMARY_SIZE)); - } else { - this.summarySize = SummaryOptions.DEFAULT_SIZE; - } - - // if "ONLY" we will clear the view names list so that we only use the ones passed in - if (options.containsKey(ONLY_SPECIFIED)) { - onlySpecified = Boolean.parseBoolean(options.get(ONLY_SPECIFIED)); - if (onlySpecified) { - viewSummaryOrder.clear(); - } - } else { - onlySpecified = false; - } - - // add the view names to the list in the order specified - if (options.containsKey(VIEW_NAMES)) { - String[] nameList = SummaryOptions.viewNamesListFromString(options.get(VIEW_NAMES)); - for (int i = nameList.length - 1; i >= 0; i--) { - String name = nameList[i]; - viewSummaryOrder.remove(name); - viewSummaryOrder.add(0, name); - } - } - } - - @Override - public Key getTopKey() { - return tk; - } - - @Override - public Value getTopValue() { - return tv; - } - - @Override - public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { - if (log.isDebugEnabled()) { - log.debug("{} seek'ing with requested range {}", this, range); - } - - // capture the column families and the inclusiveness - this.columnFamilies = columnFamilies != null ? getSortedCFs(columnFamilies) : Collections.emptySortedSet(); - this.inclusive = inclusive; - - // Determine the start key in the d keys - Key startKey = null; - if (range.getStartKey() != null) { - // get the start document - String dtAndUid = getDtUidFromEventKey(range.getStartKey(), true, range.isStartKeyInclusive()); - // if no start document - if (dtAndUid == null) { - // if no column families or not using these column families inclusively - if (this.columnFamilies.isEmpty() || !this.inclusive) { - // then start at the beginning of the d range - startKey = new Key(range.getStartKey().getRow(), Constants.D_COLUMN_FAMILY); - } else { - // otherwise start at the first document specified - startKey = new Key(range.getStartKey().getRow(), Constants.D_COLUMN_FAMILY, new Text(this.columnFamilies.first() + Constants.NULL)); - } - } else { - // we had a start document specified in the start key, so start there - startKey = new Key(range.getStartKey().getRow(), Constants.D_COLUMN_FAMILY, new Text(dtAndUid)); - } - } - log.debug("{} calling seek to start key: {}", this, startKey); - - // Determine the end key in the d keys - Key endKey = null; - if (range.getEndKey() != null) { - // get the end document - String dtAndUid = getDtUidFromEventKey(range.getEndKey(), false, range.isEndKeyInclusive()); - // if no end document - if (dtAndUid == null) { - // if we do not have column families specified, or they are not inclusive - if (this.columnFamilies.isEmpty() || !this.inclusive) { - // then go to the end of the d keys - endKey = new Key(range.getEndKey().getRow(), Constants.D_COLUMN_FAMILY, new Text(Constants.MAX_UNICODE_STRING)); - } else { - // otherwise end at the last document specified - endKey = new Key(range.getEndKey().getRow(), Constants.D_COLUMN_FAMILY, - new Text(this.columnFamilies.last() + Constants.NULL + Constants.MAX_UNICODE_STRING)); - } - } else { - // we had an end document specified in the end key, so end there - endKey = new Key(range.getStartKey().getRow(), Constants.D_COLUMN_FAMILY, new Text(dtAndUid)); - } - } - log.debug("{} seek'ing to end key: {}", this, endKey); - - // if we have actually exhausted our range, then return with no next key - if (endKey != null && startKey != null && endKey.compareTo(startKey) <= 0) { - this.scanRange = null; - this.tk = null; - this.tv = null; - return; - } - - // set our d keys scan range - this.scanRange = new Range(startKey, false, endKey, false); - - if (log.isDebugEnabled()) { - log.debug("{} seek'ing to: {} from requested range {}", this, this.scanRange, range); - } - - // seek the underlying source - source.seek(this.scanRange, D_COLUMN_FAMILY_BYTE_SEQUENCE, true); - - // get the next key - next(); - } - - @Override - public void next() throws IOException { - tk = null; - tv = null; - - if (log.isTraceEnabled()) { - log.trace("{} calling next on {}", source.hasTop(), scanRange); - } - - // find a valid dt/uid (depends on initial column families set in seek call) - String dtUid = null; - while (source.hasTop() && dtUid == null) { - Key top = source.getTopKey(); - String thisDtUid = getDtUidFromDKey(top); - // if this dt and uid are in the accepted column families... - if (columnFamilies.contains(thisDtUid) == inclusive) { - // we can use this document - dtUid = thisDtUid; - } else { - seekToNextUid(top.getRow(), thisDtUid); - } - } - - // if no more d keys, then we are done. - if (!source.hasTop() || dtUid == null) { - return; - } - - Key top = source.getTopKey(); - - // this is where we will save all the content found for this document - final Map foundContent = new HashMap<>(); - - // while we have d keys for the same document - while (source.hasTop() && dtUid.equals(getDtUidFromDKey(source.getTopKey()))) { - top = source.getTopKey(); - - // get the view name - String currentViewName = getViewName(top); - - for (String name : viewSummaryOrder) { - if (name.endsWith("*")) { - name = name.substring(0, name.length() - 1); - if (currentViewName.startsWith(name)) { - addContentToFound(foundContent, currentViewName, source); - } - } else { - if (currentViewName.equalsIgnoreCase(name)) { - addContentToFound(foundContent, currentViewName, source); - } - } - } - - // get the next d key - source.next(); - } - - // create the summary - String summary = new SummaryCreator(viewSummaryOrder, foundContent, summarySize).createSummary(); - if (summary != null) { - tk = new Key(top.getRow(), new Text(dtUid), new Text(summary), top.getColumnVisibility()); - tv = new Value(); - return; - } - - // If we get here, we have not found content to summarize, so return null - tk = null; - tv = null; - } - - private static void addContentToFound(Map foundContent, String currentViewName, SortedKeyValueIterator source) { - // true for compressed, false for uncompressed - byte[] content = source.getTopValue().get(); - if (content.length < MAX_CONTENT_SIZE) { - foundContent.put(currentViewName + Constants.COLON + Boolean.TRUE, content); - } else { - content = ContentKeyValueFactory.decodeAndDecompressContent(content); - // pre-truncate big content to MAX_SUMMARY_SIZE - content = new String(content).substring(0, MAX_SUMMARY_SIZE).getBytes(); - foundContent.put(currentViewName + Constants.COLON + Boolean.FALSE, content); - } - } - - /** - * Seek to the dt/uid following the one passed in - * - * @param row - * a row - * @param dtAndUid - * the dt and uid string - * @throws IOException - * for issues with read/write - */ - private void seekToNextUid(Text row, String dtAndUid) throws IOException { - Key startKey = new Key(row, Constants.D_COLUMN_FAMILY, new Text(dtAndUid + Constants.ONE_BYTE)); - this.scanRange = new Range(startKey, false, this.scanRange.getEndKey(), this.scanRange.isEndKeyInclusive()); - if (log.isDebugEnabled()) { - log.debug("{} seek'ing to next document: {}", this, this.scanRange); - } - - source.seek(this.scanRange, Collections.singleton(new ArrayByteSequence(Constants.D_COLUMN_FAMILY.getBytes())), true); - } - - /** - * Get the view name from the end of the column qualifier of the d key - * - * @param dKey - * the d key - * @return the view name - */ - private static String getViewName(Key dKey) { - String cq = dKey.getColumnQualifier().toString(); - int index = cq.lastIndexOf(Constants.NULL); - return cq.substring(index + 1); - } - - /** - * get the dt and uid from a d key - * - * @param dKey - * the d key - * @return the dt\x00uid - */ - private static String getDtUidFromDKey(Key dKey) { - return getDtUid(dKey.getColumnQualifier().toString()); - } - - public void setViewNameList(List viewNameList) { - viewSummaryOrder.clear(); - viewSummaryOrder.addAll(viewNameList); - } - - @Override - public String toString() { - return "DColumnExcerptIterator: " + summarySize + ", " + onlySpecified + ", " + viewSummaryOrder; - } - -} diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java deleted file mode 100644 index 7a7ec00d501..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/SummaryCreator.java +++ /dev/null @@ -1,101 +0,0 @@ -package datawave.query.iterator.logic; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import datawave.query.Constants; -import datawave.query.table.parser.ContentKeyValueFactory; - -/** - * This class contains the functionality to generate summaries. - *

- *

- * Just need to call "createSummary()" after creation. - */ -public class SummaryCreator { - private final List viewSummaryOrder; - Map foundContent; - int summarySize; - - public SummaryCreator(List viewSummaryOrder, Map foundContent, int summarySize) { - this.viewSummaryOrder = viewSummaryOrder; - this.foundContent = foundContent; - this.summarySize = summarySize; - } - - /** - * this method attempts to create a summary out of the found views - * - * @return the created summary - */ - public String createSummary() { - // check each potential view name we could make summaries for - for (String name : viewSummaryOrder) { - if (name.endsWith("*")) { - // strip wildcard from view name - name = name.substring(0, name.length() - 1); - - String endingWildcardSummary = getEndingWildcardSummary(name, foundContent, summarySize); - if (endingWildcardSummary != null) { - return endingWildcardSummary; - } - } else { - String simpleSummary = getSimpleSummary(name, foundContent, summarySize); - if (simpleSummary != null) { - return simpleSummary; - } - } - } - return null; - } - - /** for matching and creating summaries when view names have trailing wildcards */ - private static String getEndingWildcardSummary(String currentViewName, Map foundContent, int summarySize) { - // if we have a view name that matches the list... - Map summaries = new HashMap<>(); - for (Map.Entry entry : foundContent.entrySet()) { - // first part is view, second part is if compressed still - String[] s = entry.getKey().split(Constants.COLON); - if (s[0].startsWith(currentViewName)) { - summaries.put(entry.getKey(), getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(s[1]))); - } - } - if (!summaries.isEmpty()) { - // return the view name and summary separated by a new line character - StringBuilder sb = new StringBuilder(); - for (Map.Entry entry : summaries.entrySet()) { - sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); - } - return sb.toString().trim(); - } - return null; - } - - /** a straight-up match between view names */ - private static String getSimpleSummary(String currentViewName, Map foundContent, int summarySize) { - for (Map.Entry entry : foundContent.entrySet()) { - // first part is view, second part is if compressed still - String[] s = entry.getKey().split(Constants.COLON); - if (s[0].equals(currentViewName)) { - return currentViewName + ": " + getSummaryForView(entry.getValue(), summarySize, Boolean.parseBoolean(s[1])); - } - } - return null; - } - - private static String getSummaryForView(byte[] content, int summarySize, boolean needsDecompressing) { - String summary; - if (needsDecompressing) { - // decode and decompress the content - summary = new String(ContentKeyValueFactory.decodeAndDecompressContent(content)); - } else { - summary = new String(content); - } - // if the content is longer than the specified length, truncate it - if (summary.length() > summarySize) { - summary = summary.substring(0, summarySize); - } - return summary; - } -} diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/TermFrequencyExcerptIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/TermFrequencyExcerptIterator.java index cd0e4279636..f3b098493f4 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/TermFrequencyExcerptIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/TermFrequencyExcerptIterator.java @@ -868,7 +868,7 @@ private void seekToNextUid(Text row, String dtAndUid) throws IOException { * the column families * @return a sorted set of column families as Strings */ - protected static SortedSet getSortedCFs(Collection columnFamilies) { + private static SortedSet getSortedCFs(Collection columnFamilies) { return columnFamilies.stream().map(m -> { try { return Text.decode(m.getBackingArray(), m.offset(), m.length()); @@ -916,7 +916,7 @@ private static String getDtUidFromTfKey(Key tfKey) { * inclusive boolean flag * @return the start or end document (cq) for our tf scan range. Null if dt,uid does not exist in the event key */ - protected static String getDtUidFromEventKey(Key eventKey, boolean startKey, boolean inclusive) { + private static String getDtUidFromEventKey(Key eventKey, boolean startKey, boolean inclusive) { // if an infinite end range, or unspecified end document, then no document to specify if (eventKey == null || eventKey.getColumnFamily() == null || eventKey.getColumnFamily().getLength() == 0) { return null; @@ -941,7 +941,7 @@ protected static String getDtUidFromEventKey(Key eventKey, boolean startKey, boo } // get the dt/uid from the beginning of a given string - protected static String getDtUid(String str) { + private static String getDtUid(String str) { int index = str.indexOf(Constants.NULL); index = str.indexOf(Constants.NULL, index + 1); return index == -1 ? str : str.substring(0, index); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java index 7bf58108dfe..73fe785a3ec 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctions.java @@ -28,7 +28,6 @@ public class QueryFunctions { public static final String UNIQUE_FUNCTION = "unique"; public static final String GROUPBY_FUNCTION = "groupby"; public static final String EXCERPT_FIELDS_FUNCTION = "excerpt_fields"; - public static final String SUMMARY_FUNCTION = "summary"; public static final String LENIENT_FIELDS_FUNCTION = "lenient"; public static final String STRICT_FIELDS_FUNCTION = "strict"; public static final String MATCH_REGEX = "matchRegex"; diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 259c86b0a6b..718cfa7ff50 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -181,8 +181,6 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { } } break; - case QueryFunctions.SUMMARY_FUNCTION: - break; case QueryFunctions.MATCH_REGEX: case BETWEEN: case LENGTH: @@ -278,8 +276,6 @@ private static void verify(String name, int numArgs) { throw new IllegalArgumentException("Expected at least one argument to the " + name + " function"); } break; - case QueryFunctions.SUMMARY_FUNCTION: - break; default: throw new IllegalArgumentException("Unknown Query function: " + name); } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java index 324bedd55d3..a9b9bdb8de0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java @@ -59,7 +59,7 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { QueryFunctions.UNIQUE_FUNCTION, UniqueFunction.UNIQUE_BY_DAY_FUNCTION, UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, - QueryFunctions.GROUPBY_FUNCTION, QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.SUMMARY_FUNCTION, QueryFunctions.NO_EXPANSION, + QueryFunctions.GROUPBY_FUNCTION, QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION, QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, QueryFunctions.AVERAGE, QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION); @@ -249,12 +249,6 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { updateFieldsOption(optionsMap, QueryParameters.EXCERPT_FIELDS, optionsList); return null; } - case QueryFunctions.SUMMARY_FUNCTION: { - List options = new ArrayList<>(); - this.visit(node, options); - optionsMap.put(QueryParameters.SUMMARY_OPTIONS, JOINER.join(options)); - return null; - } case QueryFunctions.NO_EXPANSION: { List optionsList = new ArrayList<>(); this.visit(node, optionsList); diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/SummaryOptions.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/SummaryOptions.java deleted file mode 100644 index 9eea024f1a6..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/SummaryOptions.java +++ /dev/null @@ -1,57 +0,0 @@ -package datawave.query.language.functions.jexl; - -import java.text.MessageFormat; -import java.util.ArrayList; - -import datawave.query.jexl.functions.QueryFunctions; -import datawave.query.language.functions.QueryFunction; -import datawave.webservice.query.exception.BadRequestQueryException; -import datawave.webservice.query.exception.DatawaveErrorCode; - -/** - * Function to specify when summaries should be included for results for any hit documents. This function accepts a string in the format - * {@code size/[only]/[contentName1, contentName2, ....]}. See {@link datawave.query.attributes.SummaryOptions} for additional documentation on supported - * formatting. - */ -public class SummaryOptions extends JexlQueryFunction { - - public SummaryOptions() { - super(QueryFunctions.SUMMARY_FUNCTION, new ArrayList<>()); - } - - @Override - public void validate() throws IllegalArgumentException { - String parameters = this.parameterList.isEmpty() ? "" : String.join(",", parameterList); - try { - datawave.query.attributes.SummaryOptions.from(parameters); - } catch (Exception e) { - BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, - MessageFormat.format("Unable to parse summary options from arguments for function {0}", this.name)); - throw new IllegalArgumentException(qe); - } - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.SUMMARY_FUNCTION); - if (parameterList.isEmpty()) { - sb.append("()"); - } else { - char separator = '('; - for (String parm : parameterList) { - sb.append(separator).append(escapeString(parm)); - separator = ','; - } - sb.append(')'); - } - - return sb.toString(); - } - - @Override - public QueryFunction duplicate() { - return new SummaryOptions(); - } -} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.java index c605d12f271..b9dd081637c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.java @@ -1,7 +1,11 @@ /* Generated By:JavaCC: Do not edit this line. AccumuloSyntaxParser.java */ package datawave.query.language.parser.lucene; -/* Licensed to the Apache Software Foundation (ASF) under one or more +import java.io.StringReader; +import java.util.Vector; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 @@ -16,9 +20,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.StringReader; -import java.util.Vector; - import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages; import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; @@ -41,7 +42,7 @@ import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; @SuppressWarnings("all") -public class AccumuloSyntaxParser implements SyntaxParser, AccumuloSyntaxParserConstants { +public class AccumuloSyntaxParser implements SyntaxParser, AcumuloSyntaxParserConstants { private static final int CONJ_NONE = 0; private static final int CONJ_AND = 2; @@ -604,7 +605,7 @@ final public QueryNode Term(CharSequence field) throws ParseException { throw new Error("Missing return statement in function"); } - private boolean jj_2_1(int xla) { + final private boolean jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { @@ -616,7 +617,7 @@ private boolean jj_2_1(int xla) { } } - private boolean jj_3_1() { + final private boolean jj_3_1() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(OP_COLON)) @@ -624,30 +625,28 @@ private boolean jj_3_1() { return false; } - /** Generated Token Manager. */ public AccumuloSyntaxParserTokenManager token_source; - /** Current token. */ - public Token token; - /** Next token. */ - public Token jj_nt; + public Token token, jj_nt; private int jj_ntk; private Token jj_scanpos, jj_lastpos; private int jj_la; + public boolean lookingAhead = false; + private boolean jj_semLA; private int jj_gen; final private int[] jj_la1 = new int[21]; static private int[] jj_la1_0; static private int[] jj_la1_1; static { - jj_la1_init_0(); - jj_la1_init_1(); + jj_la1_0(); + jj_la1_1(); } - private static void jj_la1_init_0() { + private static void jj_la1_0() { jj_la1_0 = new int[] {0x6000, 0x6000, 0x8000, 0x4000, 0x2000, 0xf638000, 0x100000, 0xf638000, 0x9410000, 0x800000, 0x800000, 0x100000, 0x6000000, 0x80000000, 0x10000000, 0x80000000, 0x60000000, 0x100000, 0x800000, 0x100000, 0xf610000,}; } - private static void jj_la1_init_1() { + private static void jj_la1_1() { jj_la1_1 = new int[] {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,}; } @@ -655,7 +654,6 @@ private static void jj_la1_init_1() { private boolean jj_rescan = false; private int jj_gc = 0; - /** Constructor with user supplied CharStream. */ public AccumuloSyntaxParser(CharStream stream) { token_source = new AccumuloSyntaxParserTokenManager(stream); token = new Token(); @@ -667,7 +665,6 @@ public AccumuloSyntaxParser(CharStream stream) { jj_2_rtns[i] = new JJCalls(); } - /** Reinitialise. */ public void ReInit(CharStream stream) { token_source.ReInit(stream); token = new Token(); @@ -679,7 +676,6 @@ public void ReInit(CharStream stream) { jj_2_rtns[i] = new JJCalls(); } - /** Constructor with generated Token Manager. */ public AccumuloSyntaxParser(AccumuloSyntaxParserTokenManager tm) { token_source = tm; token = new Token(); @@ -691,7 +687,6 @@ public AccumuloSyntaxParser(AccumuloSyntaxParserTokenManager tm) { jj_2_rtns[i] = new JJCalls(); } - /** Reinitialise. */ public void ReInit(AccumuloSyntaxParserTokenManager tm) { token_source = tm; token = new Token(); @@ -703,7 +698,7 @@ public void ReInit(AccumuloSyntaxParserTokenManager tm) { jj_2_rtns[i] = new JJCalls(); } - private Token jj_consume_token(int kind) throws ParseException { + final private Token jj_consume_token(int kind) throws ParseException { Token oldToken; if ((oldToken = token).next != null) token = token.next; @@ -734,7 +729,7 @@ static private final class LookaheadSuccess extends java.lang.Error {} final private LookaheadSuccess jj_ls = new LookaheadSuccess(); - private boolean jj_scan_token(int kind) { + final private boolean jj_scan_token(int kind) { if (jj_scanpos == jj_lastpos) { jj_la--; if (jj_scanpos.next == null) { @@ -762,7 +757,6 @@ private boolean jj_scan_token(int kind) { return false; } - /** Get the next Token. */ final public Token getNextToken() { if (token.next != null) token = token.next; @@ -773,9 +767,8 @@ final public Token getNextToken() { return token; } - /** Get the specific Token. */ final public Token getToken(int index) { - Token t = token; + Token t = lookingAhead ? jj_scanpos : token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; @@ -785,14 +778,14 @@ final public Token getToken(int index) { return t; } - private int jj_ntk() { + final private int jj_ntk() { if ((jj_nt = token.next) == null) return (jj_ntk = (token.next = token_source.getNextToken()).kind); else return (jj_ntk = jj_nt.kind); } - private java.util.List jj_expentries = new java.util.ArrayList(); + private java.util.Vector jj_expentries = new java.util.Vector(); private int[] jj_expentry; private int jj_kind = -1; private int[] jj_lasttokens = new int[100]; @@ -808,27 +801,34 @@ private void jj_add_error_token(int kind, int pos) { for (int i = 0; i < jj_endpos; i++) { jj_expentry[i] = jj_lasttokens[i]; } - jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { - int[] oldentry = (int[]) (it.next()); + boolean exists = false; + for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) { + int[] oldentry = (int[]) (e.nextElement()); if (oldentry.length == jj_expentry.length) { + exists = true; for (int i = 0; i < jj_expentry.length; i++) { if (oldentry[i] != jj_expentry[i]) { - continue jj_entries_loop; + exists = false; + break; } } - jj_expentries.add(jj_expentry); - break jj_entries_loop; + if (exists) + break; } } + if (!exists) + jj_expentries.addElement(jj_expentry); if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; } } - /** Generate ParseException. */ public ParseException generateParseException() { - jj_expentries.clear(); + jj_expentries.removeAllElements(); boolean[] la1tokens = new boolean[33]; + for (int i = 0; i < 33; i++) { + la1tokens[i] = false; + } if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -849,7 +849,7 @@ public ParseException generateParseException() { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; - jj_expentries.add(jj_expentry); + jj_expentries.addElement(jj_expentry); } } jj_endpos = 0; @@ -857,18 +857,16 @@ public ParseException generateParseException() { jj_add_error_token(0, 0); int[][] exptokseq = new int[jj_expentries.size()][]; for (int i = 0; i < jj_expentries.size(); i++) { - exptokseq[i] = jj_expentries.get(i); + exptokseq[i] = (int[]) jj_expentries.elementAt(i); } return new ParseException(token, exptokseq, tokenImage); } - /** Enable tracing. */ final public void enable_tracing() {} - /** Disable tracing. */ final public void disable_tracing() {} - private void jj_rescan_token() { + final private void jj_rescan_token() { jj_rescan = true; for (int i = 0; i < 1; i++) { try { @@ -890,7 +888,7 @@ private void jj_rescan_token() { jj_rescan = false; } - private void jj_save(int index, int xla) { + final private void jj_save(int index, int xla) { JJCalls p = jj_2_rtns[index]; while (p.gen > jj_gen) { if (p.next == null) { diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.jj b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.jj index 26ed3a6b65d..9575d25cbc3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.jj +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParser.jj @@ -1,5 +1,5 @@ /** - * Standard file is based on the TextParser.f from lucene 2.3 + * Standard file is based on the TextParser.jj from lucene 2.3 */ options { @@ -127,7 +127,7 @@ PARSER_END(AccumuloSyntaxParser) | | -| )+ "(" (<_FUNCTION_ARG> ( (<_WHITESPACE>)* "," (<_WHITESPACE>)* <_FUNCTION_ARG> )* (<_WHITESPACE>)*)* ")" > +| )+ "(" <_FUNCTION_ARG> ( (<_WHITESPACE>)* "," (<_WHITESPACE>)* <_FUNCTION_ARG> )* (<_WHITESPACE>)* ")" > | | | diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserTokenManager.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserTokenManager.java index 2bd08ea15d6..9e422af5be7 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserTokenManager.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserTokenManager.java @@ -1,30 +1,20 @@ /* Generated By:JavaCC: Do not edit this line. AccumuloSyntaxParserTokenManager.java */ package datawave.query.language.parser.lucene; -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at +/** + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for + * additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -/** Token Manager. */ -public class AccumuloSyntaxParserTokenManager implements AccumuloSyntaxParserConstants { - - /** Debug output. */ +public class AccumuloSyntaxParserTokenManager implements AcumuloSyntaxParserConstants { public java.io.PrintStream debugStream = System.out; - /** Set debug output. */ public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } @@ -57,13 +47,24 @@ private final int jjStartNfa_2(int pos, long active0) { return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); } - private int jjStopAtPos(int pos, int kind) { + private final int jjStopAtPos(int pos, int kind) { jjmatchedKind = kind; jjmatchedPos = pos; return pos + 1; } - private int jjMoveStringLiteralDfa0_2() { + private final int jjStartNfaWithStates_2(int pos, int kind, int state) { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { + curChar = input_stream.readChar(); + } catch (java.io.IOException e) { + return pos + 1; + } + return jjMoveNfa_2(state, pos + 1); + } + + private final int jjMoveStringLiteralDfa0_2() { switch (curChar) { case 40: return jjStopAtPos(0, 17); @@ -91,7 +92,7 @@ private int jjMoveStringLiteralDfa0_2() { } } - private int jjMoveStringLiteralDfa1_2(long active0) { + private final int jjMoveStringLiteralDfa1_2(long active0) { try { curChar = input_stream.readChar(); } catch (java.io.IOException e) { @@ -116,7 +117,7 @@ private int jjMoveStringLiteralDfa1_2(long active0) { return jjStartNfa_2(0, active0); } - private int jjMoveStringLiteralDfa2_2(long old0, long active0) { + private final int jjMoveStringLiteralDfa2_2(long old0, long active0) { if (((active0 &= old0)) == 0L) return jjStartNfa_2(0, old0); try { @@ -142,15 +143,33 @@ private int jjMoveStringLiteralDfa2_2(long old0, long active0) { return jjStartNfa_2(1, active0); } - private int jjStartNfaWithStates_2(int pos, int kind, int state) { - jjmatchedKind = kind; - jjmatchedPos = pos; - try { - curChar = input_stream.readChar(); - } catch (java.io.IOException e) { - return pos + 1; + private final void jjCheckNAdd(int state) { + if (jjrounds[state] != jjround) { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; } - return jjMoveNfa_2(state, pos + 1); + } + + private final void jjAddStates(int start, int end) { + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); + } + + private final void jjCheckNAddTwoStates(int state1, int state2) { + jjCheckNAdd(state1); + jjCheckNAdd(state2); + } + + private final void jjCheckNAddStates(int start, int end) { + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); + } + + private final void jjCheckNAddStates(int start) { + jjCheckNAdd(jjnextStates[start]); + jjCheckNAdd(jjnextStates[start + 1]); } static final long[] jjbitVec0 = {0x1L, 0x0L, 0x0L, 0x0L}; @@ -158,18 +177,19 @@ private int jjStartNfaWithStates_2(int pos, int kind, int state) { static final long[] jjbitVec3 = {0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL}; static final long[] jjbitVec4 = {0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL}; - private int jjMoveNfa_2(int startState, int curPos) { + private final int jjMoveNfa_2(int startState, int curPos) { + int[] nextStates; int startsAt = 0; jjnewStateCnt = 71; int i = 1; jjstateSet[0] = startState; - int kind = 0x7fffffff; + int j, kind = 0x7fffffff; for (;;) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { long l = 1L << curChar; - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if ((0xfbff7cf2ffffd9ffL & l) != 0L) { @@ -207,115 +227,115 @@ else if (curChar == 35) break; case 5: if (curChar == 40) - jjCheckNAddStates(9, 13); + jjAddStates(9, 12); break; case 6: case 7: if (curChar == 34) - jjCheckNAddStates(14, 16); + jjCheckNAddStates(13, 15); break; case 9: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(14, 16); + jjCheckNAddStates(13, 15); break; case 10: if (curChar == 34) - jjCheckNAddStates(17, 23); + jjCheckNAddStates(16, 18); break; case 11: if (curChar == 44) - jjCheckNAddStates(24, 28); + jjCheckNAddStates(19, 23); break; case 12: if ((0x100002600L & l) != 0L) - jjCheckNAddStates(24, 28); + jjCheckNAddStates(19, 23); break; case 13: case 14: if (curChar == 34) - jjCheckNAddStates(29, 31); + jjCheckNAddStates(24, 26); break; case 16: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(29, 31); + jjCheckNAddStates(24, 26); break; case 17: if ((0xffffec7affffffffL & l) != 0L) - jjCheckNAddStates(32, 41); + jjCheckNAddStates(27, 32); break; case 18: if ((0xffffecffffffffffL & l) != 0L) - jjCheckNAddStates(42, 44); + jjCheckNAddStates(33, 35); break; case 19: if ((0xffffec7affffffffL & l) != 0L) - jjCheckNAddStates(17, 23); + jjCheckNAddStates(16, 18); break; case 20: if (curChar == 41 && kind > 16) kind = 16; break; case 21: - if ((0xffffec7affffffffL & l) != 0L) - jjCheckNAddStates(45, 54); + if ((0x100002600L & l) != 0L) + jjCheckNAddStates(36, 39); break; case 22: - if ((0xffffecffffffffffL & l) != 0L) - jjCheckNAddStates(55, 57); + if ((0x100002600L & l) != 0L) + jjCheckNAddTwoStates(22, 11); break; - case 24: - jjCheckNAddStates(55, 57); + case 23: + if ((0x100002600L & l) != 0L) + jjCheckNAddTwoStates(23, 20); break; case 25: - jjCheckNAddStates(17, 23); + jjCheckNAddStates(33, 35); break; - case 27: - if (curChar == 39) - jjCheckNAddStates(58, 62); + case 26: + jjCheckNAddStates(16, 18); break; case 28: - if ((0xffffff7fffffffffL & l) != 0L) - jjCheckNAddTwoStates(28, 29); - break; - case 30: if (curChar == 39) - jjCheckNAddTwoStates(28, 29); + jjCheckNAddStates(40, 44); + break; + case 29: + if ((0xffffff7fffffffffL & l) != 0L) + jjCheckNAddTwoStates(29, 30); break; case 31: - case 40: if (curChar == 39) - jjCheckNAddStates(17, 23); + jjCheckNAddTwoStates(29, 30); break; case 32: - if ((0x100002600L & l) != 0L) - jjCheckNAddStates(63, 70); - break; - case 33: - if ((0x100002600L & l) != 0L) - jjCheckNAddTwoStates(33, 11); + case 38: + if (curChar == 39) + jjCheckNAddStates(16, 18); break; case 34: - if ((0x100002600L & l) != 0L) - jjCheckNAddStates(71, 76); - break; - case 36: if (curChar == 39) - jjCheckNAddStates(77, 79); + jjCheckNAddStates(45, 47); break; - case 38: + case 36: if ((0xffffff7fffffffffL & l) != 0L) - jjCheckNAddStates(77, 79); + jjCheckNAddStates(45, 47); break; - case 39: + case 37: case 52: - jjCheckNAdd(40); + jjCheckNAdd(38); + break; + case 39: + if ((0xffffec7affffffffL & l) != 0L) + jjCheckNAddStates(48, 53); + break; + case 40: + if ((0xffffecffffffffffL & l) != 0L) + jjCheckNAddStates(54, 56); break; case 42: - jjCheckNAddStates(42, 44); + jjCheckNAddStates(54, 56); break; case 44: if (curChar == 39) - jjCheckNAddStates(80, 84); + jjCheckNAddStates(57, 61); break; case 45: if ((0xffffff7fffffffffL & l) != 0L) @@ -327,11 +347,11 @@ else if (curChar == 35) break; case 49: if (curChar == 39) - jjCheckNAddStates(85, 87); + jjCheckNAddStates(62, 64); break; case 51: if ((0xffffff7fffffffffL & l) != 0L) - jjCheckNAddStates(85, 87); + jjCheckNAddStates(62, 64); break; case 53: if (curChar == 34) @@ -358,7 +378,7 @@ else if (curChar == 35) break; if (kind > 23) kind = 23; - jjAddStates(88, 89); + jjAddStates(65, 66); break; case 64: if (curChar == 46) @@ -390,7 +410,7 @@ else if (curChar == 35) } while (i != startsAt); } else if (curChar < 128) { long l = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if ((0x97ffffffc7ffffffL & l) != 0L) { @@ -428,80 +448,80 @@ else if (curChar == 35) jjstateSet[jjnewStateCnt++] = 7; break; case 9: - jjCheckNAddStates(14, 16); + jjCheckNAddStates(13, 15); break; case 15: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 14; break; case 16: - jjCheckNAddStates(29, 31); + jjCheckNAddStates(24, 26); break; case 17: - jjCheckNAddStates(32, 41); + jjCheckNAddStates(27, 32); break; case 18: - case 42: - jjCheckNAddStates(42, 44); - break; - case 19: case 25: - jjCheckNAddStates(17, 23); + jjCheckNAddStates(33, 35); break; - case 21: - jjCheckNAddStates(45, 54); + case 19: + case 26: + jjCheckNAddStates(16, 18); break; - case 22: case 24: - jjCheckNAddStates(55, 57); - break; - case 23: - case 26: + case 27: if (curChar == 92) - jjCheckNAddTwoStates(24, 25); - break; - case 28: - jjAddStates(90, 91); + jjCheckNAddTwoStates(25, 26); break; case 29: + jjAddStates(67, 68); + break; + case 30: if (curChar == 92) - jjAddStates(92, 93); + jjAddStates(69, 70); break; - case 35: + case 33: case 48: if (curChar == 92) - jjCheckNAdd(31); + jjCheckNAdd(32); break; - case 37: + case 35: if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 36; + jjstateSet[jjnewStateCnt++] = 34; break; - case 38: - jjAddStates(77, 79); + case 36: + jjAddStates(45, 47); break; - case 39: + case 37: case 52: if ((0xffffffffefffffffL & l) != 0L) - jjCheckNAdd(40); + jjCheckNAdd(38); + break; + case 39: + jjCheckNAddStates(48, 53); + break; + case 40: + case 42: + jjCheckNAddStates(54, 56); break; case 41: case 43: if (curChar == 92) - jjCheckNAddTwoStates(42, 25); + jjCheckNAddTwoStates(42, 26); break; case 45: - jjAddStates(94, 95); + jjAddStates(71, 72); break; case 46: if (curChar == 92) - jjAddStates(96, 97); + jjAddStates(73, 74); break; case 50: if (curChar == 92) jjstateSet[jjnewStateCnt++] = 49; break; case 51: - jjAddStates(85, 87); + jjAddStates(62, 64); break; case 54: if ((0xffffffffefffffffL & l) != 0L) @@ -558,7 +578,7 @@ else if (curChar == 35) long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; long l2 = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { @@ -589,71 +609,71 @@ else if (curChar == 35) break; case 9: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(14, 16); + jjCheckNAddStates(13, 15); break; case 12: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(24, 28); + jjAddStates(19, 23); break; case 16: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(29, 31); + jjCheckNAddStates(24, 26); break; case 17: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(32, 41); + jjCheckNAddStates(27, 32); break; case 18: - case 42: + case 25: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(42, 44); + jjCheckNAddStates(33, 35); break; case 19: - case 25: + case 26: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(17, 23); + jjCheckNAddStates(16, 18); break; case 21: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(45, 54); + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(36, 39); break; case 22: - case 24: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(55, 57); - break; - case 28: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(90, 91); - break; - case 32: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(63, 70); + jjCheckNAddTwoStates(22, 11); break; - case 33: + case 23: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(33, 11); + jjCheckNAddTwoStates(23, 20); break; - case 34: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(71, 76); + case 29: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjAddStates(67, 68); break; - case 38: + case 36: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(77, 79); + jjAddStates(45, 47); break; - case 39: + case 37: case 52: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAdd(40); + jjCheckNAdd(38); + break; + case 39: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(48, 53); + break; + case 40: + case 42: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(54, 56); break; case 45: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(94, 95); + jjAddStates(71, 72); break; case 51: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(85, 87); + jjAddStates(62, 64); break; case 54: case 56: @@ -692,29 +712,30 @@ else if (curChar == 35) } } - private int jjMoveStringLiteralDfa0_0() { + private final int jjMoveStringLiteralDfa0_0() { return jjMoveNfa_0(0, 0); } - private int jjMoveNfa_0(int startState, int curPos) { + private final int jjMoveNfa_0(int startState, int curPos) { + int[] nextStates; int startsAt = 0; jjnewStateCnt = 3; int i = 1; jjstateSet[0] = startState; - int kind = 0x7fffffff; + int j, kind = 0x7fffffff; for (;;) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { long l = 1L << curChar; - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 27) kind = 27; - jjAddStates(98, 99); + jjAddStates(75, 76); break; case 1: if (curChar == 46) @@ -733,7 +754,7 @@ private int jjMoveNfa_0(int startState, int curPos) { } while (i != startsAt); } else if (curChar < 128) { long l = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { default: break; @@ -745,7 +766,7 @@ private int jjMoveNfa_0(int startState, int curPos) { long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; long l2 = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { default: break; @@ -785,7 +806,18 @@ private final int jjStartNfa_1(int pos, long active0) { return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); } - private int jjMoveStringLiteralDfa0_1() { + private final int jjStartNfaWithStates_1(int pos, int kind, int state) { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { + curChar = input_stream.readChar(); + } catch (java.io.IOException e) { + return pos + 1; + } + return jjMoveNfa_1(state, pos + 1); + } + + private final int jjMoveStringLiteralDfa0_1() { switch (curChar) { case 93: return jjStopAtPos(0, 29); @@ -799,7 +831,7 @@ private int jjMoveStringLiteralDfa0_1() { } } - private int jjMoveStringLiteralDfa1_1(long active0) { + private final int jjMoveStringLiteralDfa1_1(long active0) { try { curChar = input_stream.readChar(); } catch (java.io.IOException e) { @@ -818,29 +850,19 @@ private int jjMoveStringLiteralDfa1_1(long active0) { return jjStartNfa_1(0, active0); } - private int jjStartNfaWithStates_1(int pos, int kind, int state) { - jjmatchedKind = kind; - jjmatchedPos = pos; - try { - curChar = input_stream.readChar(); - } catch (java.io.IOException e) { - return pos + 1; - } - return jjMoveNfa_1(state, pos + 1); - } - - private int jjMoveNfa_1(int startState, int curPos) { + private final int jjMoveNfa_1(int startState, int curPos) { + int[] nextStates; int startsAt = 0; jjnewStateCnt = 7; int i = 1; jjstateSet[0] = startState; - int kind = 0x7fffffff; + int j, kind = 0x7fffffff; for (;;) { if (++jjround == 0x7fffffff) ReInitRounds(); if (curChar < 64) { long l = 1L << curChar; - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if ((0xfffffffeffffffffL & l) != 0L) { @@ -860,11 +882,11 @@ private int jjMoveNfa_1(int startState, int curPos) { break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(100, 102); + jjCheckNAddStates(77, 79); break; case 3: if (curChar == 34) - jjCheckNAddStates(100, 102); + jjCheckNAddStates(77, 79); break; case 5: if (curChar == 34 && kind > 31) @@ -883,7 +905,7 @@ private int jjMoveNfa_1(int startState, int curPos) { } while (i != startsAt); } else if (curChar < 128) { long l = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: case 6: @@ -894,7 +916,7 @@ private int jjMoveNfa_1(int startState, int curPos) { jjCheckNAdd(6); break; case 2: - jjAddStates(100, 102); + jjAddStates(77, 79); break; case 4: if (curChar == 92) @@ -910,7 +932,7 @@ private int jjMoveNfa_1(int startState, int curPos) { long l1 = 1L << (hiByte & 077); int i2 = (curChar & 0xff) >> 6; long l2 = 1L << (curChar & 077); - do { + MatchLoop: do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { @@ -925,7 +947,7 @@ private int jjMoveNfa_1(int startState, int curPos) { break; case 2: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(100, 102); + jjAddStates(77, 79); break; case 6: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) @@ -955,9 +977,9 @@ private int jjMoveNfa_1(int startState, int curPos) { } } - static final int[] jjnextStates = {67, 69, 70, 54, 55, 57, 2, 3, 5, 6, 20, 21, 26, 27, 8, 9, 10, 6, 11, 20, 21, 26, 27, 32, 12, 13, 17, 43, 44, 15, 16, 10, - 6, 11, 18, 19, 20, 21, 26, 27, 32, 41, 18, 19, 41, 22, 19, 6, 11, 20, 21, 26, 27, 32, 23, 22, 19, 23, 28, 37, 38, 39, 29, 6, 33, 11, 34, 20, 21, 26, - 27, 6, 34, 20, 21, 26, 27, 37, 38, 39, 45, 50, 51, 52, 46, 50, 51, 52, 63, 64, 28, 29, 30, 35, 45, 46, 47, 48, 0, 1, 2, 4, 5,}; + static final int[] jjnextStates = {67, 69, 70, 54, 55, 57, 2, 3, 5, 6, 39, 43, 44, 8, 9, 10, 11, 20, 21, 12, 13, 17, 27, 28, 15, 16, 10, 11, 18, 19, 20, 21, + 24, 18, 19, 24, 22, 11, 23, 20, 29, 35, 36, 37, 30, 35, 36, 37, 40, 19, 11, 20, 21, 41, 40, 19, 41, 45, 50, 51, 52, 46, 50, 51, 52, 63, 64, 29, 30, + 31, 33, 45, 46, 47, 48, 0, 1, 2, 4, 5,}; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { switch (hiByte) { @@ -992,14 +1014,9 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo } } - /** Token literal values. */ public static final String[] jjstrLiteralImages = {"", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, "\50", "\51", "\72", "\136", null, null, null, null, "\133", "\173", null, null, "\135", "\175", null, null,}; - - /** Lexer state names. */ public static final String[] lexStateNames = {"Boost", "Range", "DEFAULT",}; - - /** Lex State array. */ public static final int[] jjnewLexState = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, 1, 1, 2, -1, 2, 2, -1, -1,}; static final long[] jjtoToken = {0x1ffffe001L,}; @@ -1009,18 +1026,15 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo private final int[] jjstateSet = new int[142]; protected char curChar; - /** Constructor. */ public AccumuloSyntaxParserTokenManager(CharStream stream) { input_stream = stream; } - /** Constructor. */ public AccumuloSyntaxParserTokenManager(CharStream stream, int lexState) { this(stream); SwitchTo(lexState); } - /** Reinitialise parser. */ public void ReInit(CharStream stream) { jjmatchedPos = jjnewStateCnt = 0; curLexState = defaultLexState; @@ -1028,20 +1042,18 @@ public void ReInit(CharStream stream) { ReInitRounds(); } - private void ReInitRounds() { + private final void ReInitRounds() { int i; jjround = 0x80000001; for (i = 71; i-- > 0;) jjrounds[i] = 0x80000000; } - /** Reinitialise parser. */ public void ReInit(CharStream stream, int lexState) { ReInit(stream); SwitchTo(lexState); } - /** Switch to specified lex state. */ public void SwitchTo(int lexState) { if (lexState >= 3 || lexState < 0) throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); @@ -1050,27 +1062,14 @@ public void SwitchTo(int lexState) { } protected Token jjFillToken() { - final Token t; - final String curTokenImage; - final int beginLine; - final int endLine; - final int beginColumn; - final int endColumn; - String im = jjstrLiteralImages[jjmatchedKind]; - curTokenImage = (im == null) ? input_stream.GetImage() : im; - beginLine = input_stream.getBeginLine(); - beginColumn = input_stream.getBeginColumn(); - endLine = input_stream.getEndLine(); - endColumn = input_stream.getEndColumn(); - t = Token.newToken(jjmatchedKind); + Token t = Token.newToken(jjmatchedKind); t.kind = jjmatchedKind; - t.image = curTokenImage; - - t.beginLine = beginLine; - t.endLine = endLine; - t.beginColumn = beginColumn; - t.endColumn = endColumn; - + String im = jjstrLiteralImages[jjmatchedKind]; + t.image = (im == null) ? input_stream.GetImage() : im; + t.beginLine = input_stream.getBeginLine(); + t.beginColumn = input_stream.getBeginColumn(); + t.endLine = input_stream.getEndLine(); + t.endColumn = input_stream.getEndColumn(); return t; } @@ -1081,8 +1080,9 @@ protected Token jjFillToken() { int jjmatchedPos; int jjmatchedKind; - /** Get the next Token. */ public Token getNextToken() { + int kind; + Token specialToken = null; Token matchedToken; int curPos = 0; @@ -1150,28 +1150,4 @@ public Token getNextToken() { } } - private void jjCheckNAdd(int state) { - if (jjrounds[state] != jjround) { - jjstateSet[jjnewStateCnt++] = state; - jjrounds[state] = jjround; - } - } - - private void jjAddStates(int start, int end) { - do { - jjstateSet[jjnewStateCnt++] = jjnextStates[start]; - } while (start++ != end); - } - - private void jjCheckNAddTwoStates(int state1, int state2) { - jjCheckNAdd(state1); - jjCheckNAdd(state2); - } - - private void jjCheckNAddStates(int start, int end) { - do { - jjCheckNAdd(jjnextStates[start]); - } while (start++ != end); - } - } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserConstants.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AcumuloSyntaxParserConstants.java similarity index 54% rename from warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserConstants.java rename to warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AcumuloSyntaxParserConstants.java index 94da5e21598..46a2ce83b12 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AccumuloSyntaxParserConstants.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/AcumuloSyntaxParserConstants.java @@ -1,84 +1,45 @@ /* Generated By:JavaCC: Do not edit this line. AccumuloSyntaxParserConstants.java */ package datawave.query.language.parser.lucene; -/** - * Token literal values and constants. Generated by org.javacc.parser.OtherFilesGen#start() - */ -public interface AccumuloSyntaxParserConstants { +public interface AcumuloSyntaxParserConstants { - /** End of File. */ int EOF = 0; - /** RegularExpression Id. */ int _NUM_CHAR = 1; - /** RegularExpression Id. */ int _ESCAPED_CHAR = 2; - /** RegularExpression Id. */ int _TERM_START_CHAR = 3; - /** RegularExpression Id. */ int _TERM_CHAR = 4; - /** RegularExpression Id. */ int _FUNCTION_ARG_CHAR = 5; - /** RegularExpression Id. */ int _FUNCTION_ARG_BEGINEND_CHAR = 6; - /** RegularExpression Id. */ int _FUNCTION_ARG_ALL_CHAR1 = 7; - /** RegularExpression Id. */ int _FUNCTION_ARG_ALL_CHAR2 = 8; - /** RegularExpression Id. */ int _FUNCTION_ARG = 9; - /** RegularExpression Id. */ int _WHITESPACE = 10; - /** RegularExpression Id. */ int _QUOTED_CHAR = 11; - /** RegularExpression Id. */ int AND = 13; - /** RegularExpression Id. */ int OR = 14; - /** RegularExpression Id. */ int NOT = 15; - /** RegularExpression Id. */ int FUNCTION = 16; - /** RegularExpression Id. */ int LPAREN = 17; - /** RegularExpression Id. */ int RPAREN = 18; - /** RegularExpression Id. */ int OP_COLON = 19; - /** RegularExpression Id. */ int CARAT = 20; - /** RegularExpression Id. */ int QUOTED = 21; - /** RegularExpression Id. */ int TERM = 22; - /** RegularExpression Id. */ int FUZZY_SLOP = 23; - /** RegularExpression Id. */ int REGEXPTERM = 24; - /** RegularExpression Id. */ int RANGEIN_START = 25; - /** RegularExpression Id. */ int RANGEEX_START = 26; - /** RegularExpression Id. */ int NUMBER = 27; - /** RegularExpression Id. */ int RANGE_TO = 28; - /** RegularExpression Id. */ int RANGEIN_END = 29; - /** RegularExpression Id. */ int RANGEEX_END = 30; - /** RegularExpression Id. */ int RANGE_QUOTED = 31; - /** RegularExpression Id. */ int RANGE_GOOP = 32; - /** Lexical state. */ int Boost = 0; - /** Lexical state. */ int Range = 1; - /** Lexical state. */ int DEFAULT = 2; - /** Literal token values. */ String[] tokenImage = {"", "<_NUM_CHAR>", "<_ESCAPED_CHAR>", "<_TERM_START_CHAR>", "<_TERM_CHAR>", "<_FUNCTION_ARG_CHAR>", "<_FUNCTION_ARG_BEGINEND_CHAR>", "<_FUNCTION_ARG_ALL_CHAR1>", "<_FUNCTION_ARG_ALL_CHAR2>", "<_FUNCTION_ARG>", "<_WHITESPACE>", "<_QUOTED_CHAR>", "", "\"AND\"", "\"OR\"", "\"NOT\"", "", "\"(\"", "\")\"", "\":\"", "\"^\"", "", "", "", diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 5b517da26ed..f4d053cdd26 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -622,8 +622,6 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c configureExcerpts(config, cfg); - configureSummaries(config, cfg); - addOption(cfg, QueryOptions.LIMIT_FIELDS, config.getLimitFieldsAsString(), false); addOption(cfg, QueryOptions.MATCHING_FIELD_SETS, config.getMatchingFieldSetsAsString(), false); addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFields().toString(), true); @@ -657,13 +655,6 @@ private void configureExcerpts(ShardQueryConfiguration config, IteratorSetting c } } - private void configureSummaries(ShardQueryConfiguration config, IteratorSetting cfg) { - if (config.getSummaryOptions().getSummarySize() != 0) { - addOption(cfg, QueryOptions.SUMMARY_OPTIONS, config.getSummaryOptions().toString(), true); - addOption(cfg, QueryOptions.SUMMARY_ITERATOR, config.getSummaryIterator().getName(), false); - } - } - /* * (non-Javadoc) * diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index 743fb1abfc1..249b33d2b26 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -1,6 +1,7 @@ package datawave.query.planner; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -13,7 +14,6 @@ import datawave.query.Constants; import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; -import datawave.query.attributes.SummaryOptions; import datawave.query.attributes.UniqueFields; import datawave.query.common.grouping.GroupFields; import datawave.query.config.ShardQueryConfiguration; @@ -68,10 +68,6 @@ public static void apply(Map optionsMap, ShardQueryConfiguration ExcerptFields excerptFields = ExcerptFields.from(value); config.setExcerptFields(excerptFields); break; - case QueryParameters.SUMMARY_OPTIONS: - SummaryOptions summaryOptions = SummaryOptions.from(value); - config.setSummaryOptions(summaryOptions); - break; case QueryParameters.NO_EXPANSION_FIELDS: config.setNoExpansionFields(new HashSet<>(Arrays.asList(StringUtils.split(value, Constants.PARAM_VALUE_SEP)))); break; diff --git a/warehouse/query-core/src/main/java/datawave/query/table/parser/ContentKeyValueFactory.java b/warehouse/query-core/src/main/java/datawave/query/table/parser/ContentKeyValueFactory.java index 0fd257dbd55..558d7ef2e4a 100644 --- a/warehouse/query-core/src/main/java/datawave/query/table/parser/ContentKeyValueFactory.java +++ b/warehouse/query-core/src/main/java/datawave/query/table/parser/ContentKeyValueFactory.java @@ -45,7 +45,19 @@ public static ContentKeyValue parse(Key key, Value value, Authorizations auths, * We are storing 'documents' in this column gzip'd and base64 encoded. Base64.decode detects and handles compression. */ byte[] contents = value.get(); - contents = decodeAndDecompressContent(contents); + try { + contents = decompress(Base64.getMimeDecoder().decode(contents)); + } catch (IOException e) { + log.error("Error decompressing Base64 encoded GZIPInputStream", e); + } catch (Exception e) { + // Thrown when data is not Base64 encoded. Try GZIP + try { + contents = decompress(contents); + } catch (IOException ioe) { + log.error("Error decompressing GZIPInputStream", e); + } + } + c.setContents(contents); } @@ -54,22 +66,6 @@ public static ContentKeyValue parse(Key key, Value value, Authorizations auths, return c; } - public static byte[] decodeAndDecompressContent(byte[] contents) { - try { - contents = decompress(Base64.getMimeDecoder().decode(contents)); - } catch (IOException e) { - log.error("Error decompressing Base64 encoded GZIPInputStream", e); - } catch (Exception e) { - // Thrown when data is not Base64 encoded. Try GZIP - try { - contents = decompress(contents); - } catch (IOException ioe) { - log.error("Error decompressing GZIPInputStream", e); - } - } - return contents; - } - private static boolean isCompressed(byte[] compressed) { return (compressed[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (compressed[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8)); } diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index e487962fe46..bacab5def54 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -63,7 +63,6 @@ import datawave.query.DocumentSerialization; import datawave.query.QueryParameters; import datawave.query.attributes.ExcerptFields; -import datawave.query.attributes.SummaryOptions; import datawave.query.attributes.UniqueFields; import datawave.query.cardinality.CardinalityConfiguration; import datawave.query.common.grouping.GroupFields; @@ -988,14 +987,6 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting } } - // Get the SUMMARY parameter if given - String summaryParam = settings.findParameter(QueryParameters.SUMMARY_OPTIONS).getParameterValue().trim(); - if (StringUtils.isNotBlank(summaryParam)) { - SummaryOptions summaryOptions = SummaryOptions.from(summaryParam); - this.setSummaryOptions(summaryOptions); - config.setSummaryOptions(summaryOptions); - } - // Get the HIT_LIST parameter if given String hitListString = settings.findParameter(QueryParameters.HIT_LIST).getParameterValue().trim(); if (StringUtils.isNotBlank(hitListString)) { @@ -1541,26 +1532,6 @@ public void setExcerptIterator(String iteratorClass) { } } - public SummaryOptions getSummaryOptions() { - return getConfig().getSummaryOptions(); - } - - public void setSummaryOptions(SummaryOptions summaryOptions) { - getConfig().setSummaryOptions(summaryOptions); - } - - public String getSummaryIteratorClassName() { - return getConfig().getSummaryIterator().getName(); - } - - public void setSummaryIteratorClassName(String iteratorClass) { - try { - getConfig().setSummaryIterator((Class>) Class.forName(iteratorClass)); - } catch (Exception e) { - throw new DatawaveFatalQueryException("Illegal content summary iterator class", e); - } - } - public int getFiFieldSeek() { return getConfig().getFiFieldSeek(); } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/SummaryTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/SummaryTransform.java deleted file mode 100644 index 258ac2a3285..00000000000 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/SummaryTransform.java +++ /dev/null @@ -1,276 +0,0 @@ -package datawave.query.transformer; - -import static datawave.query.iterator.logic.ContentSummaryIterator.ONLY_SPECIFIED; -import static datawave.query.iterator.logic.ContentSummaryIterator.SUMMARY_SIZE; -import static datawave.query.iterator.logic.ContentSummaryIterator.VIEW_NAMES; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Set; - -import javax.annotation.Nullable; - -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.PartialKey; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.iterators.IteratorEnvironment; -import org.apache.accumulo.core.iterators.SortedKeyValueIterator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Iterators; - -import datawave.common.util.ArgumentChecker; -import datawave.query.Constants; -import datawave.query.attributes.Attributes; -import datawave.query.attributes.Content; -import datawave.query.attributes.Document; -import datawave.query.attributes.DocumentKey; -import datawave.query.attributes.SummaryOptions; -import datawave.query.iterator.logic.ContentSummaryIterator; - -/** - * This class is used to add summaries to returned documents when specified. - *

- *

- * An iterator of type "ContentSummaryIterator" is used to do the summary generation using options from a "SummaryOptions" - */ -public class SummaryTransform extends DocumentTransform.DefaultDocumentTransform { - - private static final Logger log = LoggerFactory.getLogger(SummaryTransform.class); - - private static final String SUMMARY_ERROR_MESSAGE = "UNABLE TO GENERATE SUMMARY"; - private static final String SUMMARY_EMPTY_MESSAGE = "NO CONTENT FOUND TO SUMMARIZE"; - private static final Summary ERROR_SUMMARY = new Summary(null, SUMMARY_ERROR_MESSAGE); - private static final Summary EMPTY_SUMMARY = new Summary(null, SUMMARY_EMPTY_MESSAGE); - - private static final String CONTENT_SUMMARY = "CONTENT_SUMMARY"; - - private final ContentSummaryIterator summaryIterator; - private final SummaryOptions summaryOptions; - private final IteratorEnvironment env; - private final SortedKeyValueIterator source; - - public SummaryTransform(SummaryOptions summaryOptions, IteratorEnvironment env, SortedKeyValueIterator source, - SortedKeyValueIterator summaryIterator) { - ArgumentChecker.notNull(summaryOptions); - this.summaryOptions = summaryOptions; - this.env = env; - this.source = source; - this.summaryIterator = (ContentSummaryIterator) summaryIterator; - - } - - @Nullable - @Override - public Entry apply(@Nullable Entry entry) { - if (entry != null) { - Document document = entry.getValue(); - // Do not bother adding summaries to transient documents. - if (document.isToKeep()) { - ArrayList documentKeys = getEventIds(document); - if (!documentKeys.isEmpty()) { - if (log.isTraceEnabled()) { - log.trace("Fetching summaries {} for document {}", summaryOptions, document.getMetadata()); - } - Set summaries = getSummaries(documentKeys); - addSummariesToDocument(summaries, document); - } else { - if (log.isTraceEnabled()) { - log.trace("document keys were not added to document {}, skipping", document.getMetadata()); - } - } - } - } - return entry; - } - - /** - * Retrieve the eventIds in the document. - * - * @param document - * the document - * @return a list of the eventIds - */ - private static ArrayList getEventIds(Document document) { - ArrayList eventIds = new ArrayList<>(); - if (document.containsKey("RECORD_ID")) { - eventIds.add((DocumentKey) document.get("RECORD_ID")); - } else { - Key key = document.getMetadata(); - String[] cf = key.getColumnFamily().toString().split(Constants.NULL); - eventIds.add(new DocumentKey(key.getRow().toString(), cf[0], cf[1], document.isToKeep())); - } - - return eventIds; - } - - /** - * Add the summaries to the document as part of {@value #CONTENT_SUMMARY}. - * - * @param summaries - * the summaries to add - * @param document - * the document - */ - private static void addSummariesToDocument(Set summaries, Document document) { - Attributes summaryAttribute = new Attributes(true); - - for (Summary summary : summaries) { - if (!summary.isEmpty()) { - Content contentSummary = new Content(summary.getSummary(), summary.getSource(), true); - summaryAttribute.add(contentSummary); - } - } - - document.put(CONTENT_SUMMARY, summaryAttribute); - } - - /** - * Get the summaries. - * - * @param documentKeys - * the pre-identified document keys - * @return the summaries - */ - private Set getSummaries(final ArrayList documentKeys) { - if (documentKeys.isEmpty()) { - return Collections.emptySet(); - } - - // Fetch the summaries. - Set summaries = new HashSet<>(); - for (DocumentKey documentKey : documentKeys) { - if (log.isTraceEnabled()) { - log.trace("Fetching summary for document {}", - documentKey.getShardId() + Constants.NULL + documentKey.getDataType() + Constants.NULL + documentKey.getUid()); - } - - // Construct the required range for this document. - Key startKey = new Key(documentKey.getShardId(), documentKey.getDataType() + Constants.NULL + documentKey.getUid()); - Key endKey = startKey.followingKey(PartialKey.ROW_COLFAM); - Range range = new Range(startKey, true, endKey, false); - - Summary summary = getSummary(range, summaryOptions); - // Only retain non-blank summaries. - if (!summary.isEmpty()) { - summaries.add(summary); - } else { - if (log.isTraceEnabled()) { - log.trace("Failed to find summary for document {}", - documentKey.getShardId() + Constants.NULL + documentKey.getDataType() + Constants.NULL + documentKey.getUid()); - } - } - } - return summaries; - } - - /** - * Get the summary - * - * @param range - * the range to use when seeking - * @param summaryOptions - * the object with our summary specifications - * @return the summary - */ - private Summary getSummary(Range range, SummaryOptions summaryOptions) { - // get the options out of the SummaryOptions object - final Map summaryIteratorOptions = new HashMap<>(); - summaryIteratorOptions.put(SUMMARY_SIZE, String.valueOf(summaryOptions.getSummarySize())); - if (!summaryOptions.isEmpty()) { - summaryIteratorOptions.put(VIEW_NAMES, summaryOptions.viewNamesListToString()); - } - summaryIteratorOptions.put(ONLY_SPECIFIED, String.valueOf(summaryOptions.onlyListedViews())); - - try { - // set all of our options for the iterator - summaryIterator.init(source, summaryIteratorOptions, env); - - // run the iterator - summaryIterator.seek(range, Collections.emptyList(), false); - - // if a summary is returned... - if (summaryIterator.hasTop()) { - // the excerpt will be in the column qualifier of the top key - String summary = summaryIterator.getTopKey().getColumnQualifier().toString(); - // The column qualifier has the summary/summaries in it. - // make sure the summary is not blank... - if (summary.isBlank()) { - if (log.isErrorEnabled()) { - log.error("{} returned top key with blank column qualifier in key: {} when scanning for summary within range {}", - ContentSummaryIterator.class.getSimpleName(), summaryIterator.getTopKey(), range); - } - return ERROR_SUMMARY; - } - // return our summary - return new Summary(range.getStartKey(), summary); - } - } catch (IOException e) { - throw new RuntimeException("Failed to scan for summary within range " + range, e); - } - - // when working correctly, it should always return from inside the loop so if this is reached something went very wrong - return EMPTY_SUMMARY; - } - - /** - * Add summaries to the documents from the given iterator. - * - * @param in - * the iterator source - * @return an iterator that will supply the enriched documents - */ - public Iterator> getIterator(final Iterator> in) { - return Iterators.transform(in, this); - } - - /** - * A class that holds the info for one summary. - */ - private static class Summary { - private final String summary; - private final Key source; - - public Summary(Key source, String summary) { - this.source = source; - this.summary = summary; - } - - public String getSummary() { - return summary; - } - - public Key getSource() { - return source; - } - - public boolean isEmpty() { - return summary.isEmpty(); - } - - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Summary summary1 = (Summary) o; - return (summary.equals(summary1.summary) && source.equals(summary1.source)); - } - - @Override - public int hashCode() { - return Objects.hash(summary, source); - } - } - -} diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index 7ae3cdccfc3..ae9f64a1a84 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -39,11 +39,9 @@ import datawave.microservice.query.QueryImpl; import datawave.query.DocumentSerialization; import datawave.query.attributes.ExcerptFields; -import datawave.query.attributes.SummaryOptions; import datawave.query.attributes.UniqueFields; import datawave.query.common.grouping.GroupFields; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; -import datawave.query.iterator.logic.ContentSummaryIterator; import datawave.query.iterator.logic.TermFrequencyExcerptIterator; import datawave.query.iterator.logic.TermFrequencyIndexIterator; import datawave.query.jexl.JexlASTHelper; @@ -53,7 +51,7 @@ public class ShardQueryConfigurationTest { - public static final Map,Class> primitiveMap = new HashMap<>(); + public final static Map,Class> primitiveMap = new HashMap<>(); static { primitiveMap.put(Boolean.class, boolean.class); primitiveMap.put(Byte.class, byte.class); @@ -469,10 +467,6 @@ public void setUp() throws Exception { updatedValues.put("excerptFields", ExcerptFields.from("FIELD_E/10,FIELD_F/11")); defaultValues.put("excerptIterator", TermFrequencyExcerptIterator.class); updatedValues.put("excerptIterator", TermFrequencyIndexIterator.class); - defaultValues.put("summaryOptions", new SummaryOptions()); - updatedValues.put("summaryOptions", SummaryOptions.from(String.valueOf(SummaryOptions.DEFAULT_SIZE))); - defaultValues.put("summaryIterator", ContentSummaryIterator.class); - updatedValues.put("summaryIterator", ContentSummaryIterator.class); defaultValues.put("fiFieldSeek", -1); updatedValues.put("fiFieldSeek", 10); defaultValues.put("fiNextSeek", -1); diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java deleted file mode 100644 index 876698d9f1a..00000000000 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/ContentSummaryIteratorTest.java +++ /dev/null @@ -1,256 +0,0 @@ -package datawave.query.iterator.logic; - -import static datawave.query.iterator.logic.ContentSummaryIterator.ONLY_SPECIFIED; -import static datawave.query.iterator.logic.ContentSummaryIterator.SUMMARY_SIZE; -import static datawave.query.iterator.logic.ContentSummaryIterator.VIEW_NAMES; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Base64; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.zip.GZIPOutputStream; - -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.PartialKey; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.iterators.IteratorEnvironment; -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.hadoop.io.Text; -import org.easymock.EasyMockRunner; -import org.easymock.EasyMockSupport; -import org.easymock.Mock; -import org.junit.After; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; - -import datawave.ingest.mapreduce.handler.ExtendedDataTypeHandler; -import datawave.query.Constants; -import datawave.query.iterator.SortedListKeyValueIterator; - -@RunWith(EasyMockRunner.class) -public class ContentSummaryIteratorTest extends EasyMockSupport { - - private static final Text row = new Text("20220115_1"); - private static final Text colf = new Text(ExtendedDataTypeHandler.FULL_CONTENT_COLUMN_FAMILY); - - @Mock - private IteratorEnvironment env; - private static final List> source = new ArrayList<>(); - private final Map options = new HashMap<>(); - private final ContentSummaryIterator iterator = new ContentSummaryIterator(); - - @BeforeClass - public static void beforeClass() throws IOException { - givenData("email", "123.456.789", "CONTENT1", "test content"); - givenData("email", "987.654.321", "CONTENT1", "test content two first"); - givenData("email", "987.654.321", "CONTENT2", "test content two second"); - givenData("pdf", "111.222.333", "CONTENT2", "this is a test of a longer content compared to the other ones to test trimming"); - givenData("pdf", "111.222.333", "CONTENT31", "test content wildcard matching one"); - givenData("pdf", "111.222.333", "CONTENT32", "test content wildcard matching two"); - } - - private static void givenData(String datatype, String uid, String contentName, String content) throws IOException { - Text colq = new Text(datatype + Constants.NULL + uid + Constants.NULL + contentName); - Key key = new Key(row, colf, colq, new ColumnVisibility("ALL"), new Date().getTime()); - final ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.max(content.getBytes().length / 2, 1024)); - final OutputStream b64s = Base64.getEncoder().wrap(bos); - final GZIPOutputStream gzip = new GZIPOutputStream(b64s); - gzip.write(content.getBytes()); - gzip.close(); - b64s.close(); - bos.close(); - Value value = new Value(bos.toByteArray()); - Map.Entry entry = new AbstractMap.SimpleEntry<>(key, value); - source.add(entry); - } - - @After - public void tearDown() { - options.clear(); - } - - /** - * @param contentNameList - * a comma separated list of content names in order - * @param only - * if we only want to use the content names from the passed in contentNameList - */ - private void givenOptions(String contentNameList, int summarySize, boolean only) { - if (contentNameList != null) { - options.put(VIEW_NAMES, contentNameList); - } - options.put(SUMMARY_SIZE, String.valueOf(summarySize)); - options.put(ONLY_SPECIFIED, String.valueOf(only)); - } - - private void initIterator() throws IOException { - iterator.init(new SortedListKeyValueIterator(source), options, env); - } - - @Test - public void testMatchFound1() throws IOException { - givenOptions("CONTENT1", 100, false); - initIterator(); - - Key startKey = new Key(row, new Text("email" + Constants.NULL + "123.456.789")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("email" + Constants.NULL + "123.456.789"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT1: test content"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFound2() throws IOException { - givenOptions("CONTENT2", 100, false); - initIterator(); - - Key startKey = new Key(row, new Text("email" + Constants.NULL + "987.654.321")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("email" + Constants.NULL + "987.654.321"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT2: test content two second"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFoundSpecificContentNotFirstInList() throws IOException { - givenOptions("CONTENT2", 100, false); - iterator.setViewNameList(List.of("CONTENT1", "CONTENT2")); - iterator.init(new SortedListKeyValueIterator(source), options, env); - - Key startKey = new Key(row, new Text("email" + Constants.NULL + "987.654.321")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("email" + Constants.NULL + "987.654.321"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT2: test content two second"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFoundWithTruncatedOutput() throws IOException { - givenOptions("CONTENT2", 30, false); - initIterator(); - - Key startKey = new Key(row, new Text("pdf" + Constants.NULL + "111.222.333")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("pdf" + Constants.NULL + "111.222.333"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT2: this is a test of a longer con"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFoundWithTruncatedMinimumOutput() throws IOException { - givenOptions("CONTENT2", -87, false); - initIterator(); - - Key startKey = new Key(row, new Text("pdf" + Constants.NULL + "111.222.333")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("pdf" + Constants.NULL + "111.222.333"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT2: t"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFoundWithSizeOverMax() throws IOException { - givenOptions("CONTENT1", 9000, false); - initIterator(); - - Key startKey = new Key(row, new Text("email" + Constants.NULL + "123.456.789")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("email" + Constants.NULL + "123.456.789"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT1: test content"), topKey.getColumnQualifier()); - } - - @Test - public void testMatchFoundWithTrailingRegex() throws IOException { - givenOptions("CONTENT3*", 100, false); - initIterator(); - - Key startKey = new Key(row, new Text("pdf" + Constants.NULL + "111.222.333")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertTrue(iterator.hasTop()); - - Key topKey = iterator.getTopKey(); - assertEquals(row, topKey.getRow()); - assertEquals(new Text("pdf" + Constants.NULL + "111.222.333"), topKey.getColumnFamily()); - assertEquals(new Text("CONTENT31:true: test content wildcard matching one\nCONTENT32:true: test content wildcard matching two"), - topKey.getColumnQualifier()); - } - - @Test - public void testNoMatchFoundForDataTypeAndUid() throws IOException { - givenOptions("CONTENT2", 50, false); - initIterator(); - - Key startKey = new Key(row, new Text("other" + Constants.NULL + "111.111.111")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertFalse(iterator.hasTop()); - } - - @Test - public void testNoMatchFoundForContentName() throws IOException { - givenOptions("THISWONTBEFOUND", 100, true); - iterator.setViewNameList(List.of("CONTENT1", "CONTENT2", "return", "of", "the", "mack")); - iterator.init(new SortedListKeyValueIterator(source), options, env); - - Key startKey = new Key(row, new Text("email" + Constants.NULL + "987.654.321")); - Range range = new Range(startKey, true, startKey.followingKey(PartialKey.ROW_COLFAM), false); - - iterator.seek(range, Collections.emptyList(), false); - - assertFalse(iterator.hasTop()); - } -} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java b/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java deleted file mode 100644 index 9d59f63b0e1..00000000000 --- a/warehouse/query-core/src/test/java/datawave/query/util/SummaryTest.java +++ /dev/null @@ -1,382 +0,0 @@ -package datawave.query.util; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.Collection; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TimeZone; -import java.util.UUID; - -import javax.inject.Inject; - -import org.apache.accumulo.core.client.AccumuloClient; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.jboss.arquillian.container.test.api.Deployment; -import org.jboss.arquillian.junit.Arquillian; -import org.jboss.shrinkwrap.api.ShrinkWrap; -import org.jboss.shrinkwrap.api.asset.StringAsset; -import org.jboss.shrinkwrap.api.spec.JavaArchive; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; - -import datawave.configuration.spring.SpringBean; -import datawave.core.query.configuration.GenericQueryConfiguration; -import datawave.helpers.PrintUtility; -import datawave.ingest.data.TypeRegistry; -import datawave.microservice.query.QueryImpl; -import datawave.query.QueryTestTableHelper; -import datawave.query.attributes.Attribute; -import datawave.query.attributes.Attributes; -import datawave.query.attributes.Document; -import datawave.query.function.JexlEvaluation; -import datawave.query.function.deserializer.KryoDocumentDeserializer; -import datawave.query.tables.ShardQueryLogic; -import datawave.query.tables.edge.DefaultEdgeEventQueryLogic; -import datawave.util.TableName; -import datawave.webservice.edgedictionary.RemoteEdgeDictionary; - -public abstract class SummaryTest { - - @RunWith(Arquillian.class) - public static class ShardRange extends SummaryTest { - protected static AccumuloClient connector = null; - - @BeforeClass - public static void setUp() throws Exception { - - QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRange.class.toString(), log); - connector = qtth.client; - WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.SHARD); - Authorizations auths = new Authorizations("ALL"); - PrintUtility.printTable(connector, auths, TableName.SHARD); - PrintUtility.printTable(connector, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - } - - @Override - protected void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParams, Collection goodResults, - boolean shouldReturnSomething) throws Exception { - super.runTestQuery(connector, queryString, startDate, endDate, extraParams, goodResults, shouldReturnSomething); - } - } - - @RunWith(Arquillian.class) - public static class DocumentRange extends SummaryTest { - protected static AccumuloClient connector = null; - - @BeforeClass - public static void setUp() throws Exception { - - QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRange.class.toString(), log); - connector = qtth.client; - - WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.DOCUMENT); - Authorizations auths = new Authorizations("ALL"); - PrintUtility.printTable(connector, auths, TableName.SHARD); - PrintUtility.printTable(connector, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - } - - @Override - protected void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParams, Collection goodResults, - boolean shouldReturnSomething) throws Exception { - super.runTestQuery(connector, queryString, startDate, endDate, extraParams, goodResults, shouldReturnSomething); - } - } - - private static final Logger log = Logger.getLogger(SummaryTest.class); - - protected Authorizations auths = new Authorizations("ALL"); - - protected Set authSet = Set.of(auths); - - @Inject - @SpringBean(name = "EventQuery") - protected ShardQueryLogic logic; - - protected KryoDocumentDeserializer deserializer; - - private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); - - @Deployment - public static JavaArchive createDeployment() throws Exception { - - return ShrinkWrap.create(JavaArchive.class) - .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", - "datawave.webservice.query.result.event") - .deleteClass(DefaultEdgeEventQueryLogic.class).deleteClass(RemoteEdgeDictionary.class) - .deleteClass(datawave.query.metrics.QueryMetricQueryLogic.class) - .addAsManifestResource(new StringAsset( - "" + "datawave.query.tables.edge.MockAlternative" + ""), - "beans.xml"); - } - - @AfterClass - public static void teardown() { - TypeRegistry.reset(); - } - - @Before - public void setup() { - TimeZone.setDefault(TimeZone.getTimeZone("GMT")); - log.setLevel(Level.TRACE); - logic.setFullTableScanEnabled(true); - deserializer = new KryoDocumentDeserializer(); - } - - protected abstract void runTestQuery(String queryString, Date startDate, Date endDate, Map extraParams, Collection goodResults, - boolean shouldReturnSomething) throws Exception; - - protected void runTestQuery(AccumuloClient connector, String queryString, Date startDate, Date endDate, Map extraParams, - Collection goodResults, boolean shouldReturnSomething) throws Exception { - - QueryImpl settings = new QueryImpl(); - settings.setBeginDate(startDate); - settings.setEndDate(endDate); - settings.setPagesize(Integer.MAX_VALUE); - settings.setQueryAuthorizations(auths.serialize()); - settings.setQuery(queryString); - settings.setParameters(extraParams); - settings.setId(UUID.randomUUID()); - - log.debug("query: " + settings.getQuery()); - log.debug("logic: " + settings.getQueryLogicName()); - - GenericQueryConfiguration config = logic.initialize(connector, settings, authSet); - logic.setupQuery(config); - - Set docs = new HashSet<>(); - Set unexpectedFields = new HashSet<>(); - for (Map.Entry entry : logic) { - Document d = deserializer.apply(entry).getValue(); - log.trace(entry.getKey() + " => " + d); - docs.add(d); - Map>> dictionary = d.getDictionary(); - - log.debug("dictionary:" + dictionary); - for (Map.Entry>> dictionaryEntry : dictionary.entrySet()) { - - // skip expected generated fields - if (dictionaryEntry.getKey().equals(JexlEvaluation.HIT_TERM_FIELD) || dictionaryEntry.getKey().contains("ORIGINAL_COUNT") - || dictionaryEntry.getKey().equals("RECORD_ID")) { - continue; - } - - Attribute> attribute = dictionaryEntry.getValue(); - if (attribute instanceof Attributes) { - for (Attribute attr : ((Attributes) attribute).getAttributes()) { - String toFind = dictionaryEntry.getKey() + ":" + attr; - boolean found = goodResults.remove(toFind); - if (found) - log.debug("removed " + toFind); - else { - unexpectedFields.add(toFind); - } - } - } else { - - String toFind = dictionaryEntry.getKey() + ":" + dictionaryEntry.getValue(); - - boolean found = goodResults.remove(toFind); - if (found) - log.debug("removed " + toFind); - else { - unexpectedFields.add(toFind); - } - } - - } - } - - assertTrue("unexpected fields returned: " + unexpectedFields, unexpectedFields.isEmpty()); - assertTrue(goodResults + " was not empty", goodResults.isEmpty()); - - if (shouldReturnSomething) { - assertFalse("No docs were returned!", docs.isEmpty()); - } else { - assertTrue("no docs should be returned!", docs.isEmpty()); - } - } - - @Test - public void testWithNoArg() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY()"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of( - "CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gun than you can with a kind word alone: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testWithOnly() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(VIEWS:CONTENT/SIZE:50/ONLY)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>( - Set.of("CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gu: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testWithoutOnly() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:50/VIEWS:CONTENT)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>( - Set.of("CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gu: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testSize() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:50)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>( - Set.of("CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gu: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testOverMaxSize() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:90000)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of( - "CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gun than you can with a kind word alone: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testNegativeSize() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:-50)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of("CONTENT_SUMMARY:CONTENT: Y: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testNoContentFound() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:50/ONLY/VIEWS:CANTFINDME,ORME)"; - - Set goodResults = new HashSet<>(Set.of("CONTENT_SUMMARY:NO CONTENT FOUND TO SUMMARIZE")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testSizeZero() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:0)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = Collections.emptySet(); - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, false); - } - - @Test - public void testNoSizeButOtherOptions() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(VIEWS:TEST1,TEST2)"; - - // not sure why the timestamp and delete flag are present - Set goodResults = new HashSet<>(Set.of( - "CONTENT_SUMMARY:CONTENT: You can get much farther with a kind word and a gun than you can with a kind word alone: : [] 9223372036854775807 false")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } - - @Test - public void testBadOptionsFormat() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(SIZE:notanumber)"; - - Set goodResults = Collections.emptySet(); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, false); - } - - @Test - public void testOnlyWithNoOtherOptions() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - extraParameters.put("return.fields", "CONTENT_SUMMARY"); - extraParameters.put("query.syntax", "LUCENE"); - - String queryString = "QUOTE:(farther) #SUMMARY(ONLY)"; - - Set goodResults = new HashSet<>(Set.of("CONTENT_SUMMARY:NO CONTENT FOUND TO SUMMARIZE")); - - runTestQuery(queryString, format.parse("19000101"), format.parse("20240101"), extraParameters, goodResults, true); - } -} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java index 115926276e6..9ed431a66d9 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java @@ -1,20 +1,13 @@ package datawave.query.util; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.AbstractMap; -import java.util.Base64; import java.util.Date; import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.zip.GZIPOutputStream; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.BatchWriterConfig; import org.apache.accumulo.core.client.MutationsRejectedException; -import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.user.SummingCombiner; @@ -37,7 +30,6 @@ import datawave.ingest.mapreduce.handler.shard.content.TermAndZone; import datawave.ingest.protobuf.TermWeight; import datawave.ingest.protobuf.Uid; -import datawave.query.Constants; import datawave.query.QueryTestTableHelper; import datawave.util.TableName; @@ -769,10 +761,6 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw addFiTfTokens(bw, range, "QUOTE", "Im gonna make him an offer he cant refuse", corleoneUID); addFiTfTokens(bw, range, "QUOTE", "If you can quote the rules then you can obey them", sopranoUID); addFiTfTokens(bw, range, "QUOTE", "You can get much farther with a kind word and a gun than you can with a kind word alone", caponeUID); - - addDColumn(datatype, corleoneUID, "CONTENT", "Im gonna make him an offer he cant refuse", bw); - addDColumn(datatype, sopranoUID, "CONTENT", "If you can quote the rules then you can obey them", bw); - addDColumn(datatype, caponeUID, "CONTENT", "You can get much farther with a kind word and a gun than you can with a kind word alone", bw); } finally { if (null != bw) { bw.close(); @@ -1113,21 +1101,4 @@ private static void addFiTfTokens(BatchWriter bw, WhatKindaRange range, String f } bw.addMutation(fi); } - - private static void addDColumn(String datatype, String uid, String contentName, String content, BatchWriter bw) - throws IOException, MutationsRejectedException { - Mutation d = new Mutation(shard); - - final ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.max(content.getBytes().length / 2, 1024)); - final OutputStream b64s = Base64.getEncoder().wrap(bos); - final GZIPOutputStream gzip = new GZIPOutputStream(b64s); - gzip.write(content.getBytes()); - gzip.close(); - b64s.close(); - bos.close(); - Value value = new Value(bos.toByteArray()); - - d.put("d", datatype + "\u0000" + uid + "\u0000" + contentName, columnVisibility, timeStamp, value); - bw.addMutation(d); - } } diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index e88918ca636..ed6d1e2345c 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -46,7 +46,6 @@ - From e57c8553e087ad8c847948fcbe5d37931bbe36f9 Mon Sep 17 00:00:00 2001 From: Moriarty <22225248+apmoriarty@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:18:16 +0000 Subject: [PATCH 14/16] NestedIterator provides native support for a seek method (#2684) * NestedIterator provides native support for a seek method * Update ArrayIterator * Update class level documentation for SeekableIterator interface * Add test to document NestedQueryIterator behavior when multiple nests exist --------- Co-authored-by: alerman --- .../query/iterator/AccumuloTreeIterable.java | 7 +- .../query/iterator/EmptyTreeIterable.java | 14 +++- .../iterator/EventDataScanNestedIterator.java | 4 +- .../query/iterator/EventFieldIterator.java | 6 ++ .../query/iterator/NestedIterator.java | 17 ++++ .../query/iterator/NestedQueryIterator.java | 25 +++++- .../query/iterator/QueryIterator.java | 3 +- .../query/iterator/SeekableIterator.java | 3 +- .../iterator/SeekableNestedIterator.java | 17 +--- .../query/iterator/logic/AndIterator.java | 56 ++++++------- .../query/iterator/logic/ArrayIterator.java | 17 +++- .../iterator/logic/IndexIteratorBridge.java | 6 +- .../query/iterator/logic/OrIterator.java | 11 +++ .../iterator/logic/RangeFilterIterator.java | 2 +- .../iterator/logic/RegexFilterIterator.java | 2 +- .../jexl/DelayedNonEventIndexContext.java | 5 +- .../iterator/NestedQueryIteratorTest.java | 82 +++++++++++++++++++ .../query/iterator/logic/AndOrIteratorIT.java | 30 +------ .../logic/IndexIteratorBridgeTest.java | 4 +- .../iterator/logic/NegationFilterTest.java | 9 ++ .../query/iterator/logic/OrIteratorIT.java | 48 ++++------- .../visitors/IteratorBuildingVisitorTest.java | 2 +- 22 files changed, 231 insertions(+), 139 deletions(-) create mode 100644 warehouse/query-core/src/test/java/datawave/query/iterator/NestedQueryIteratorTest.java diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/AccumuloTreeIterable.java b/warehouse/query-core/src/main/java/datawave/query/iterator/AccumuloTreeIterable.java index 24d1f83beee..f61d1b9301e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/AccumuloTreeIterable.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/AccumuloTreeIterable.java @@ -52,11 +52,8 @@ public Iterator> iterator() { } public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { - Iterable> leaves = tree.leaves(); - for (NestedIterator leaf : leaves) { - if (leaf instanceof SeekableIterator) { - ((SeekableIterator) leaf).seek(range, columnFamilies, inclusive); - } + for (NestedIterator leaf : tree.leaves()) { + leaf.seek(range, columnFamilies, inclusive); } seenSeek = true; } diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/EmptyTreeIterable.java b/warehouse/query-core/src/main/java/datawave/query/iterator/EmptyTreeIterable.java index 174a6dc5460..ea170d12fcb 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/EmptyTreeIterable.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/EmptyTreeIterable.java @@ -1,14 +1,17 @@ package datawave.query.iterator; +import java.io.IOException; import java.util.Collection; import java.util.Collections; +import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; import datawave.query.attributes.Document; /** - * + * A stub for the NestedIterator, functionally equivalent to {@link Collections#emptyIterator()} */ public class EmptyTreeIterable implements NestedIterator { @@ -22,14 +25,19 @@ public Key move(Key minimum) { return null; } + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + // no-op + } + @Override public Collection> leaves() { - return Collections.EMPTY_SET; + return Collections.emptySet(); } @Override public Collection> children() { - return Collections.EMPTY_SET; + return Collections.emptySet(); } @Override diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/EventDataScanNestedIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/EventDataScanNestedIterator.java index 357877b0cf7..04a23b4338b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/EventDataScanNestedIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/EventDataScanNestedIterator.java @@ -18,9 +18,9 @@ import datawave.query.attributes.Document; /** - * + * This iterator supports a full table scan over the event column */ -public class EventDataScanNestedIterator implements NestedIterator, SeekableIterator { +public class EventDataScanNestedIterator implements NestedIterator { private static final Logger log = Logger.getLogger(EventDataScanNestedIterator.class); protected SortedKeyValueIterator source; protected Key topKey = null; diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/EventFieldIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/EventFieldIterator.java index 3bef76f9587..94ff29196f9 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/EventFieldIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/EventFieldIterator.java @@ -4,6 +4,7 @@ import java.util.Collection; import java.util.Collections; +import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; @@ -78,6 +79,11 @@ public Key move(Key minimum) { return next(); } + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + source.seek(range, columnFamilies, inclusive); + } + @Override public Collection> leaves() { return Collections.emptySet(); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/NestedIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/NestedIterator.java index 5a0ca99d855..db7c84ce5ad 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/NestedIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/NestedIterator.java @@ -1,8 +1,11 @@ package datawave.query.iterator; +import java.io.IOException; import java.util.Collection; import java.util.Iterator; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.iterators.IteratorEnvironment; import datawave.query.attributes.Document; @@ -33,6 +36,20 @@ public interface NestedIterator extends Iterator { */ T move(T minimum); + /** + * Hook to allow issuing a seek to the underlying source iterator(s) + * + * @param range + * the seek range + * @param columnFamilies + * the column families + * @param inclusive + * true if range is inclusive + * @throws IOException + * for issues with reads + */ + void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException; + /** * Returns a reference to all of the leaf nodes at or below this. This is useful when we need to call seek on leaf nodes that are * SortedKeyValueIterators. diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/NestedQueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/NestedQueryIterator.java index 836636e975e..bf24b77506e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/NestedQueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/NestedQueryIterator.java @@ -1,8 +1,10 @@ package datawave.query.iterator; +import java.io.IOException; import java.util.Collection; import java.util.Queue; +import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Range; import org.apache.log4j.Logger; @@ -81,16 +83,15 @@ public T next() { @Override public void remove() { currentNest.remove(); - } @Override public void initialize() { if (null == currentNest) { popNextNest(); - } else + } else { currentNest.initialize(); - + } } @Override @@ -98,6 +99,24 @@ public T move(T minimum) { return currentNest.move(minimum); } + /** + * Seeks the current nest using the provided range. Note: if the range is beyond the current nest it is up to the caller to advance to the next nest via a + * call to {@link #hasNext()} + * + * @param range + * the seek range + * @param columnFamilies + * the column families + * @param inclusive + * true if range is inclusive + * @throws IOException + * if the underlying source has a problem + */ + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + currentNest.seek(range, columnFamilies, inclusive); + } + @Override public Collection> leaves() { return currentNest.leaves(); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index b36d1467667..c99cd2d5fcb 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -97,7 +97,6 @@ import datawave.query.iterator.profile.QuerySpanCollector; import datawave.query.iterator.profile.SourceTrackingIterator; import datawave.query.jexl.DatawaveJexlContext; -import datawave.query.jexl.JexlASTHelper; import datawave.query.jexl.StatefulArithmetic; import datawave.query.jexl.functions.FieldIndexAggregator; import datawave.query.jexl.functions.IdentityAggregator; @@ -636,7 +635,7 @@ protected NestedIterator buildDocumentIterator(Range documentRange, Range s } // Seek() the boolean logic stuff - ((SeekableIterator) docIter).seek(range, columnFamilies, inclusive); + docIter.seek(range, columnFamilies, inclusive); // now lets start off the nested iterator docIter.initialize(); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableIterator.java index 854f9f38cf1..c1ae8f156c5 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableIterator.java @@ -7,8 +7,9 @@ import org.apache.accumulo.core.data.Range; /** - * + * See {@link NestedIterator#seek(Range, Collection, boolean)} for examples of how this interface was previously used. */ +@Deprecated(forRemoval = true, since = "7.13.0") public interface SeekableIterator { /** * @see org.apache.accumulo.core.iterators.SortedKeyValueIterator diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableNestedIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableNestedIterator.java index 3a48ace5da1..e90a7bc9e6e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableNestedIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/SeekableNestedIterator.java @@ -11,11 +11,11 @@ import datawave.query.attributes.Document; /** - * + * This class is a delegate/base class now that the SeekableIterator was merged into the NestedIterator interface */ -public class SeekableNestedIterator implements NestedIterator, SeekableIterator { +public class SeekableNestedIterator implements NestedIterator { private static final Logger log = Logger.getLogger(SeekableNestedIterator.class); - private NestedIterator source; + private final NestedIterator source; protected Range totalRange = null; protected Collection columnFamilies = null; protected boolean inclusive = false; @@ -30,16 +30,7 @@ public void seek(Range range, Collection columnFamilies, boolean i this.totalRange = range; this.columnFamilies = columnFamilies; this.inclusive = inclusive; - if (source instanceof SeekableIterator) { - ((SeekableIterator) source).seek(range, columnFamilies, inclusive); - } else { - Iterable> leaves = source.leaves(); - for (NestedIterator leaf : leaves) { - if (leaf instanceof SeekableIterator) { - ((SeekableIterator) leaf).seek(range, columnFamilies, inclusive); - } - } - } + source.seek(range, columnFamilies, inclusive); } @Override diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/AndIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/AndIterator.java index e15774ab57e..a657b015bcd 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/AndIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/AndIterator.java @@ -24,14 +24,13 @@ import datawave.query.exceptions.DatawaveFatalQueryException; import datawave.query.exceptions.QueryIteratorYieldingException; import datawave.query.iterator.NestedIterator; -import datawave.query.iterator.SeekableIterator; import datawave.query.iterator.Util; import datawave.query.iterator.Util.Transformer; /** * Performs a merge join of the child iterators. It is expected that all child iterators return values in sorted order. */ -public class AndIterator> implements NestedIterator, SeekableIterator { +public class AndIterator> implements NestedIterator { // temporary stores of uninitialized streams of iterators private List> includes, excludes, contextIncludes, contextExcludes; @@ -258,27 +257,23 @@ public void seek(Range range, Collection columnFamilies, boolean i while (include.hasNext()) { NestedIterator child = include.next(); try { - for (NestedIterator itr : child.leaves()) { - if (itr instanceof SeekableIterator) { - try { - ((SeekableIterator) itr).seek(range, columnFamilies, inclusive); - } catch (IterationInterruptedException e2) { - // throw IterationInterrupted exceptions as-is with no modifications so the QueryIterator can handle it - throw e2; - } catch (Exception e2) { - if (itr.isNonEventField()) { - // dropping a non-event term from the query means that the accuracy of the query - // cannot be guaranteed. Thus, a fatal exception. - log.error("Lookup of a non-event field failed, failing query"); - throw new DatawaveFatalQueryException("Lookup of non-event field failed", e2); - } - // otherwise we can safely drop this term from the intersection as the field will get re-introduced - // to the context when the event is aggregated - // Note: even though the precision of the query is affected the accuracy is not. i.e., documents that - // would have been defeated at the field index will now be defeated at evaluation time - throw e2; - } + try { + child.seek(range, columnFamilies, inclusive); + } catch (IterationInterruptedException e2) { + // throw IterationInterrupted exceptions as-is with no modifications so the QueryIterator can handle it + throw e2; + } catch (Exception e2) { + if (child.isNonEventField()) { + // dropping a non-event term from the query means that the accuracy of the query + // cannot be guaranteed. Thus, a fatal exception. + log.error("Lookup of a non-event field failed, failing query"); + throw new DatawaveFatalQueryException("Lookup of non-event field failed", e2); } + // otherwise we can safely drop this term from the intersection as the field will get re-introduced + // to the context when the event is aggregated + // Note: even though the precision of the query is affected the accuracy is not. i.e., documents that + // would have been defeated at the field index will now be defeated at evaluation time + throw e2; } } catch (QueryIteratorYieldingException qye) { throw qye; @@ -286,21 +281,20 @@ public void seek(Range range, Collection columnFamilies, boolean i throw iie; } catch (Exception e) { include.remove(); - if (includes.isEmpty() || e instanceof DatawaveFatalQueryException || e instanceof IterationInterruptedException) { + if (includes.isEmpty() || e instanceof DatawaveFatalQueryException) { throw e; } else { log.warn("Lookup of event field failed, precision of query reduced."); } } } - Iterator> exclude = excludes.iterator(); - while (exclude.hasNext()) { - NestedIterator child = exclude.next(); - for (NestedIterator itr : child.leaves()) { - if (itr instanceof SeekableIterator) { - ((SeekableIterator) itr).seek(range, columnFamilies, inclusive); - } - } + + for (NestedIterator contextInclude : contextIncludes) { + contextInclude.seek(range, columnFamilies, inclusive); + } + + for (NestedIterator exclude : excludes) { + exclude.seek(range, columnFamilies, inclusive); } if (isInitialized()) { diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ArrayIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ArrayIterator.java index a7b44846e8c..b01e5175eec 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ArrayIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/ArrayIterator.java @@ -1,21 +1,27 @@ package datawave.query.iterator.logic; +import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.iteratorsImpl.system.SortedMapIterator; + import datawave.query.attributes.Document; import datawave.query.iterator.NestedIterator; /** - * A leaf node in an nested iterator tree. This is supposed to be a sample iterator that returns data from a sorted array. - * - * + * A leaf node in a nested iterator tree. This is supposed to be a sample iterator that returns data from a sorted array. + *

+ * This class is deprecated. A suitable replacement is an {@link IndexIteratorBridge} using a {@link SortedMapIterator}. * * @param * the type of the array iterator */ +@Deprecated(since = "7.13.0") public class ArrayIterator> implements NestedIterator { private static final Document doc = new Document(); @@ -58,6 +64,11 @@ public T move(T minimum) { } } + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + // no-op + } + public Collection> leaves() { Collection> c = new LinkedList<>(); c.add(this); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/IndexIteratorBridge.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/IndexIteratorBridge.java index 36f08fdc5d6..9e35ef9aabb 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/IndexIteratorBridge.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/IndexIteratorBridge.java @@ -17,15 +17,11 @@ import datawave.query.attributes.Document; import datawave.query.iterator.DocumentIterator; import datawave.query.iterator.NestedIterator; -import datawave.query.iterator.SeekableIterator; /** * Wraps an Accumulo iterator with a NestedIterator interface. This bridges the gap between an IndexIterator and a NestedIterator. - * - * - * */ -public class IndexIteratorBridge implements SeekableIterator, NestedIterator, Comparable { +public class IndexIteratorBridge implements NestedIterator, Comparable { private static final Logger log = Logger.getLogger(IndexIteratorBridge.class); /* diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/OrIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/OrIterator.java index 45cafa43dd9..9a577874ad0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/OrIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/OrIterator.java @@ -1,5 +1,6 @@ package datawave.query.iterator.logic; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -13,6 +14,9 @@ import java.util.SortedSet; import java.util.TreeSet; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Range; + import com.google.common.collect.TreeMultimap; import datawave.query.attributes.Document; @@ -224,6 +228,13 @@ public T move(T minimum) { } } + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + for (NestedIterator child : children()) { + child.seek(range, columnFamilies, inclusive); + } + } + /** * Advances all iterators associated with the supplied key and adds them back into the sorted multimap. If any of the sub-trees returns false, then they are * dropped. diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RangeFilterIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RangeFilterIterator.java index cbbc3d1ffa0..053f58d4f6e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RangeFilterIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RangeFilterIterator.java @@ -38,7 +38,7 @@ *

* row fi\x00FIELD : value\x00datatype\x00uid */ -public class RangeFilterIterator implements SeekableIterator, NestedIterator, Comparable { +public class RangeFilterIterator implements NestedIterator, Comparable { private static final Logger log = LoggerFactory.getLogger(RangeFilterIterator.class); diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RegexFilterIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RegexFilterIterator.java index 368277f44a8..fcb5fc0d131 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RegexFilterIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/logic/RegexFilterIterator.java @@ -40,7 +40,7 @@ *

* row fi\x00FIELD : value\x00datatype\x00uid */ -public class RegexFilterIterator implements SeekableIterator, NestedIterator, Comparable { +public class RegexFilterIterator implements NestedIterator, Comparable { private static final Logger log = LoggerFactory.getLogger(RegexFilterIterator.class); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/DelayedNonEventIndexContext.java b/warehouse/query-core/src/main/java/datawave/query/jexl/DelayedNonEventIndexContext.java index 2f0ab81a2b6..8d303dae478 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/DelayedNonEventIndexContext.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/DelayedNonEventIndexContext.java @@ -21,7 +21,6 @@ import datawave.query.collections.FunctionalSet; import datawave.query.function.Equality; import datawave.query.iterator.NestedIterator; -import datawave.query.iterator.SeekableIterator; import datawave.query.jexl.visitors.IteratorBuildingVisitor; /** @@ -121,9 +120,7 @@ private List fetchOnDemand(String name) throws IOException { for (NestedIterator leaf : leaves) { // init/seek the leaf leaf.initialize(); - if (leaf instanceof SeekableIterator) { - ((SeekableIterator) leaf).seek(docRange, columnFamilies, inclusive); - } + leaf.seek(docRange, columnFamilies, inclusive); // for each value off the leaf add it to the document list as long as equality accepts it while (leaf.hasNext()) { diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/NestedQueryIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/NestedQueryIteratorTest.java new file mode 100644 index 00000000000..de1402a8660 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/NestedQueryIteratorTest.java @@ -0,0 +1,82 @@ +package datawave.query.iterator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import datawave.query.data.parsers.EventKey; +import datawave.query.data.parsers.KeyParser; +import datawave.query.iterator.logic.IndexIteratorBridgeTest; + +public class NestedQueryIteratorTest { + + private static final Logger log = LoggerFactory.getLogger(NestedQueryIteratorTest.class); + + private final KeyParser parser = new EventKey(); + private final SortedSet results = new TreeSet<>(); + + @Test + public void testSingleNest() { + SortedSet uids = new TreeSet<>(List.of("uid-a", "uid-b", "uid-c")); + NestedQuery nestedQuery = createNestedQuery("FIELD_A", uids); + + NestedQueryIterator nestedQueryIterator = new NestedQueryIterator<>(nestedQuery); + drive(nestedQueryIterator); + assertEquals(uids, results); + } + + @Test + public void testMultipleNests() { + SortedSet uidsA = new TreeSet<>(List.of("uid-a", "uid-b", "uid-c")); + NestedQuery nestedQueryA = createNestedQuery("FIELD_A", uidsA); + + SortedSet uidsB = new TreeSet<>(List.of("uid-x", "uid-y", "uid-z")); + NestedQuery nestedQueryB = createNestedQuery("FIELD_A", uidsB); + + Collection> nestedQueries = List.of(nestedQueryA, nestedQueryB); + + NestedQueryIterator nestedQueryIterator = new NestedQueryIterator<>(nestedQueries); + drive(nestedQueryIterator); + + SortedSet expected = new TreeSet<>(); + expected.addAll(uidsA); + expected.addAll(uidsB); + assertEquals(expected, results); + } + + private void drive(NestedQueryIterator nestedQueryIterator) { + results.clear(); + while (nestedQueryIterator.hasNext()) { + Key tk = nestedQueryIterator.next(); + parser.parse(tk); + results.add(parser.getUid()); + } + } + + private NestedQuery createNestedQuery(String field, SortedSet uids) { + NestedIterator iter = IndexIteratorBridgeTest.createIndexIteratorBridge(field, uids); + try { + iter.seek(new Range(), Collections.emptySet(), true); + } catch (IOException e) { + fail("failed to initialize nested query"); + throw new RuntimeException(e); + } + + NestedQuery nestedQuery = new NestedQuery<>(); + nestedQuery.setIterator(iter); + return nestedQuery; + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/AndOrIteratorIT.java b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/AndOrIteratorIT.java index c367db8bd2c..f82b45dbf30 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/AndOrIteratorIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/AndOrIteratorIT.java @@ -108,9 +108,6 @@ void testNestedUnionWithNegatedTermBuildDocument() throws IOException { NestedIterator exclude = IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsEven, true); OrIterator union = new OrIterator(Collections.singleton(include), Collections.singleton(exclude)); - OrIteratorIT.seekIterators(Collections.singleton(include)); - OrIteratorIT.seekIterators(Collections.singleton(exclude)); - Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsPrime, true)); includes.add(union); @@ -135,8 +132,6 @@ void testNestedUnionOfNegatedTerms() throws IOException { includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsOdd)); OrIterator union = new OrIterator(includes); - OrIteratorIT.seekIterators(includes); - NestedIterator include = IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsAll); // uids built using DeMorgan's Law @@ -160,10 +155,6 @@ void testSimpleDoubleNestedUnion() throws IOException { rightIncludes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsAll)); OrIterator rightUnion = new OrIterator(rightIncludes); - // init unions - OrIteratorIT.seekIterators(leftIncludes); - OrIteratorIT.seekIterators(rightIncludes); - AndIterator itr = new AndIterator(Sets.newHashSet(leftUnion, rightUnion)); driveIterator(itr, uidsAll); } @@ -180,8 +171,6 @@ void testAllRandomUids() throws IOException { unionIncludes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsC)); OrIterator union = new OrIterator(unionIncludes); - OrIteratorIT.seekIterators(unionIncludes); - Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsA)); includes.add(union); @@ -363,9 +352,6 @@ void testNestedUnionWithNegatedIndexOnlyTermIsInterrupted() { Set> unionExcludes = new HashSet<>(); unionExcludes.add(IndexIteratorBridgeTest.createInterruptibleIndexIteratorBridge("FIELD_C", uidsAll, true, 4)); - OrIteratorIT.seekIterators(unionIncludes); - OrIteratorIT.seekIterators(unionExcludes); - OrIterator union = new OrIterator(unionIncludes, unionExcludes); Set> includes = new HashSet<>(); @@ -374,7 +360,6 @@ void testNestedUnionWithNegatedIndexOnlyTermIsInterrupted() { Map indexOnlyCounts = new HashMap<>(); indexOnlyCounts.put("FIELD_A", 1); - // indexOnlyCounts.put("FIELD_B", 2); AndIterator itr = new AndIterator(includes); assertThrows(IterationInterruptedException.class, () -> driveIterator(itr, uidsAll, indexOnlyCounts)); @@ -389,8 +374,6 @@ void testNegatedNestedUnionOfIndexOnlyFieldsIsInterrupted() { unionIncludes.add(IndexIteratorBridgeTest.createInterruptibleIndexIteratorBridge("FIELD_C", uidsOdd, true, 4)); OrIterator union = new OrIterator(unionIncludes); - OrIteratorIT.seekIterators(unionIncludes); - Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsAll, true)); @@ -488,8 +471,6 @@ private void driveIntersectionWithSimpleNestedUnion(SortedSet uidsA, Sor unionIncludes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsC, true)); OrIterator union = new OrIterator(unionIncludes); - OrIteratorIT.seekIterators(unionIncludes); - Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsA, true)); includes.add(union); @@ -509,9 +490,6 @@ private void driveIntersectionWithSimpleNestedUnionWithNegatedTerm(SortedSet> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsA, true)); includes.add(union); @@ -528,7 +506,6 @@ private void driveIntersectionWithNestedUnionOfNegatedTerms(SortedSet ui unionIncludes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsC, true)); OrIterator union = new OrIterator(unionIncludes); - OrIteratorIT.seekIterators(unionIncludes); Set> excludes = Collections.singleton(union); Set> includes = new HashSet<>(); @@ -584,9 +561,6 @@ void testCase02() throws IOException { OrIterator union = new OrIterator(unionIncludes, unionExcludes); - OrIteratorIT.seekIterators(unionIncludes); - OrIteratorIT.seekIterators(unionExcludes); - Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsA, true)); includes.add(union); @@ -809,8 +783,8 @@ private void driveIterator(AndIterator itr, SortedSet uids, Map uids = new TreeSet<>(Arrays.asList("a", "b", "c", "d", "e")); @@ -73,7 +73,7 @@ private void driveIterator(IndexIteratorBridge itr, String field, SortedSet uids) { + public static IndexIteratorBridge createIndexIteratorBridge(String field, SortedSet uids) { return createIndexIteratorBridge(field, uids, false, -1); } diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/NegationFilterTest.java b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/NegationFilterTest.java index ee7c0066a1d..6d294d92987 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/NegationFilterTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/NegationFilterTest.java @@ -5,11 +5,15 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; +import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Range; import org.junit.Test; import com.google.common.collect.Lists; @@ -153,6 +157,11 @@ public K move(K minimum) { } } + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + throw new UnsupportedEncodingException("Not implemented"); + } + @Override public Collection> leaves() { return null; diff --git a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/OrIteratorIT.java b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/OrIteratorIT.java index 13fa0049a6c..810d7f8d804 100644 --- a/warehouse/query-core/src/test/java/datawave/query/iterator/logic/OrIteratorIT.java +++ b/warehouse/query-core/src/test/java/datawave/query/iterator/logic/OrIteratorIT.java @@ -3,6 +3,7 @@ import static datawave.query.iterator.logic.TestUtil.randomUids; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -25,7 +26,6 @@ import datawave.query.attributes.Document; import datawave.query.iterator.NestedIterator; -import datawave.query.iterator.SeekableIterator; class OrIteratorIT { @@ -40,10 +40,7 @@ void testSimpleUnion() { Set> includes = new HashSet<>(); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsEven)); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_B", uidsOdd)); - - seekIterators(includes); - - OrIterator itr = new OrIterator(includes); + OrIterator itr = new OrIterator<>(includes); driveIterator(itr, new TreeSet<>(uidsAll)); } @@ -53,12 +50,10 @@ void testUnionWithIndexOnlyTerm() { includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsEven)); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_B", uidsOdd, true)); - seekIterators(includes); - Map indexOnlyCounts = new HashMap<>(); indexOnlyCounts.put("FIELD_B", 5); - OrIterator itr = new OrIterator(includes); + OrIterator itr = new OrIterator<>(includes); driveIterator(itr, uidsAll, indexOnlyCounts); } @@ -68,12 +63,10 @@ void testUnionWithIndexOnlyTermThatIsInterrupted() { includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_A", uidsEven)); includes.add(IndexIteratorBridgeTest.createInterruptibleIndexIteratorBridge("FIELD_B", uidsOdd, true, 4)); - seekIterators(includes); - Map indexOnlyCounts = new HashMap<>(); indexOnlyCounts.put("FIELD_B", 3); - OrIterator itr = new OrIterator(includes); + OrIterator itr = new OrIterator<>(includes); assertThrows(IterationInterruptedException.class, () -> driveIterator(itr, uidsAll, indexOnlyCounts)); } @@ -86,13 +79,10 @@ void testUnionWithNegatedIndexOnlyTermThatIsInterrupted() { Set> excludes = new HashSet<>(); excludes.add(IndexIteratorBridgeTest.createInterruptibleIndexIteratorBridge("FIELD_B", uidsOdd, true, 4)); - seekIterators(includes); - seekIterators(excludes); - Map indexOnlyCounts = new HashMap<>(); indexOnlyCounts.put("FIELD_B", 3); - OrIterator itr = new OrIterator(includes, excludes); + OrIterator itr = new OrIterator<>(includes, excludes); assertThrows(IllegalStateException.class, () -> driveIterator(itr, uidsAll, indexOnlyCounts)); } @@ -151,7 +141,6 @@ private void driveUnion(SortedSet uidsA, SortedSet uidsB, Sorted includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_B", uidsB, true)); includes.add(IndexIteratorBridgeTest.createIndexIteratorBridge("FIELD_C", uidsC, true)); - seekIterators(includes); OrIterator orIterator = new OrIterator<>(includes); Map indexOnlyCounts = new HashMap<>(); @@ -181,7 +170,7 @@ private void driveUnion(SortedSet uidsA, SortedSet uidsB, Sorted * @param uids * expected uids */ - private void driveIterator(OrIterator itr, SortedSet uids) { + private void driveIterator(OrIterator itr, SortedSet uids) { driveIterator(itr, uids, Collections.emptyMap()); } @@ -195,7 +184,8 @@ private void driveIterator(OrIterator itr, SortedSet uids) { * @param indexOnlyCounts * the expected index only field counts */ - private void driveIterator(OrIterator itr, SortedSet uids, Map indexOnlyCounts) { + private void driveIterator(OrIterator itr, SortedSet uids, Map indexOnlyCounts) { + seekIterator(itr); itr.initialize(); int count = 0; @@ -206,7 +196,7 @@ private void driveIterator(OrIterator itr, SortedSet uids, Map uids, Map> iterators) { - for (NestedIterator iterator : iterators) { - if (iterator instanceof SeekableIterator) { - try { - ((SeekableIterator) iterator).seek(new Range(), Collections.emptyList(), false); - } catch (IOException e) { - fail("Could not seek iterator during test setup"); - } - } + private void seekIterator(NestedIterator iterator) { + try { + iterator.seek(new Range(), Collections.emptyList(), false); + } catch (IOException e) { + fail("Failed to seek iterators", e); } } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java index a774dd6577f..88cceab583e 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/IteratorBuildingVisitorTest.java @@ -923,7 +923,7 @@ private void eval(ASTJexlScript query, Range docRange, Key docKeyHit, List Date: Wed, 15 Jan 2025 15:55:29 +0000 Subject: [PATCH 15/16] Add FieldExpansionIterator that replaces the ANYFIELD scanner (#2660) * Add FieldExpansionIterator that replaces the ANYFIELD scanner * Correct bad assumption * formatting * Add note about where scanners should be closed * close scanner in case of exception * Normalization is now earlier in query planning Unfielded terms now have every Type applied Unfielded terms are not marked eval only when a normalization exception is thrown Corrected TEXT function's index query to account for differences in normalized index values and non-normalized event values * QueryFieldDatatype updates are now additive to account for index expansion and composite field rewrites * remove leftover print statements and unused import * FieldExpansionIterator supports seeking to start date * Improve FieldExpansionIterator handling of seek/next on datatype miss * Must run node transform rules prior to expansion via normalizers * updates per code review --- .../iterators/FieldExpansionIterator.java | 185 +++++++++++++ .../query/data/parsers/ShardIndexKey.java | 98 +++++++ .../functions/QueryFunctionsDescriptor.java | 91 +++++-- .../query/jexl/lookups/AsyncIndexLookup.java | 3 +- .../jexl/lookups/BoundedRangeIndexLookup.java | 3 +- .../lookups/FieldExpansionIndexLookup.java | 174 ++++++++++++ .../jexl/lookups/FieldNameIndexLookup.java | 1 + .../query/jexl/lookups/IndexLookupMap.java | 1 - .../visitors/ExpandMultiNormalizedTerms.java | 23 +- .../UnfieldedIndexExpansionVisitor.java | 43 ++- .../query/planner/DefaultQueryPlanner.java | 27 +- .../tables/async/event/VisitorFunction.java | 1 - .../iterators/FieldExpansionIteratorTest.java | 249 ++++++++++++++++++ .../datawave/query/AnyFieldQueryTest.java | 14 +- .../datawave/query/TextFunctionQueryTest.java | 41 +-- .../query/UnindexedNumericQueryTest.java | 3 +- .../query/data/parsers/ShardIndexKeyTest.java | 41 +++ .../QueryFunctionsDescriptorTest.java | 60 +++++ .../ExpandMultiNormalizedTermsTest.java | 54 ++++ .../UnfieldedIndexExpansionVisitorTest.java | 1 + .../jexl/TestLuceneToJexlQueryParser.java | 16 ++ 21 files changed, 1048 insertions(+), 81 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/core/iterators/FieldExpansionIterator.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/data/parsers/ShardIndexKey.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldExpansionIndexLookup.java create mode 100644 warehouse/query-core/src/test/java/datawave/core/iterators/FieldExpansionIteratorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/data/parsers/ShardIndexKeyTest.java diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/FieldExpansionIterator.java b/warehouse/query-core/src/main/java/datawave/core/iterators/FieldExpansionIterator.java new file mode 100644 index 00000000000..50a99647260 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/FieldExpansionIterator.java @@ -0,0 +1,185 @@ +package datawave.core.iterators; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.PartialKey; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.IteratorEnvironment; +import org.apache.accumulo.core.iterators.OptionDescriber; +import org.apache.accumulo.core.iterators.SortedKeyValueIterator; +import org.apache.accumulo.core.iterators.user.SeekingFilter; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; + +import datawave.query.Constants; +import datawave.query.data.parsers.ShardIndexKey; + +/** + * Attempts to expand an unfielded term into all possible fields + *

+ * Performs date range filtering by default. Optionally applies datatype filtering. + *

+ * Optionally restricts the search space to a set of fields + */ +public class FieldExpansionIterator extends SeekingFilter implements OptionDescriber { + + private static final Logger log = LoggerFactory.getLogger(FieldExpansionIterator.class); + + public static final String START_DATE = "start.date"; + public static final String END_DATE = "end.date"; + public static final String FIELDS = "fields"; + public static final String DATATYPES = "dts"; + + // required + private String startDate; + private String endDate; + + private Set fields; + private TreeSet datatypes; + + // track which fields this iterator has seen and returned. this collection is not persisted between teardown and rebuilds, so unique return values + // are only guaranteed within a single non-interrupted scan session + private final Set found = new HashSet<>(); + + private final ShardIndexKey parser = new ShardIndexKey(); + + @Override + public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + if (!validateOptions(options)) { + throw new IllegalStateException("FieldExpansionIterator not configured with correct options"); + } + + if (options.containsKey(FIELDS)) { + fields = new HashSet<>(Splitter.on(',').splitToList(options.get(FIELDS))); + } + + if (options.containsKey(DATATYPES)) { + datatypes = new TreeSet<>(Splitter.on(',').splitToList(options.get(DATATYPES))); + } + + startDate = options.get(START_DATE); + endDate = options.get(END_DATE) + Constants.MAX_UNICODE_STRING; + + super.init(source, options, env); + } + + @Override + public FilterResult filter(Key k, Value v) { + + // keep it simple for now + if (log.isTraceEnabled()) { + log.trace("tk: {}", k.toStringNoTime()); + } + + parser.parse(k); + + // if field does not match, skip to next field + if ((fields != null && !fields.contains(parser.getField())) || found.contains(parser.getField())) { + log.trace("field not in set of expansion fields, or already seen this field. advancing to next field"); + return new FilterResult(false, AdvanceResult.NEXT_CF); + } + + // ensure key falls within the date range + String date = parser.getShard(); + if (date.compareTo(startDate) < 0) { + // advance to start date + log.trace("Key before start date: {} < {}", date, startDate); + return new FilterResult(false, AdvanceResult.USE_HINT); + } + + if (date.compareTo(endDate) > 0) { + // advance to next field + log.trace("Key after end date: {} > {}", date, endDate); + return new FilterResult(false, AdvanceResult.NEXT_CF); + } + + if (datatypes != null && !datatypes.contains(parser.getDatatype())) { + + String lower = datatypes.lower(parser.getDatatype()); + if (lower != null) { + // advance to next field + return new FilterResult(false, AdvanceResult.NEXT_CF); + } + + String higher = datatypes.higher(parser.getDatatype()); + if (higher != null) { + // current datatype sorts before next possible hit, advance via next + return new FilterResult(false, AdvanceResult.NEXT); + } + } + + log.trace("key accepted, advancing to next CF"); + found.add(parser.getField()); + return new FilterResult(true, AdvanceResult.NEXT_CF); + } + + /** + * This method is only called when the top key's date range lies before the configured start date + * + * @param k + * a key + * @param v + * a value + * @return the start key for a seek range + */ + @Override + public Key getNextKeyHint(Key k, Value v) { + String shard = startDate + "_0"; + + Text cq; + if (datatypes == null || datatypes.isEmpty()) { + cq = new Text(shard); + } else { + cq = new Text(shard + '\u0000' + datatypes.first()); + } + + return new Key(k.getRow(), k.getColumnFamily(), cq, k.getTimestamp()); + } + + @Override + public IteratorOptions describeOptions() { + IteratorOptions options = new IteratorOptions(getClass().getSimpleName(), "Iterator that expands unfielded terms using the global index", null, null); + options.addNamedOption(START_DATE, "The start date"); + options.addNamedOption(END_DATE, "The end date"); + options.addNamedOption(FIELDS, "(optional) A comma-delimited set of fields that defines the search space"); + options.addNamedOption(DATATYPES, "(optional) A set of datatypes used to restrict the search space"); + return options; + } + + @Override + public boolean validateOptions(Map options) { + return options.containsKey(START_DATE) && options.containsKey(END_DATE); + } + + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + if (!range.isStartKeyInclusive()) { + Preconditions.checkNotNull(range.getStartKey(), "FieldExpansionIterator expected a non-null start key"); + Preconditions.checkNotNull(range.getStartKey().getColumnFamily(), "FieldExpansionIterator expected a non-null column qualifier"); + // need to skip to next column family + Key skip = range.getStartKey().followingKey(PartialKey.ROW_COLFAM); + if (skip.compareTo(range.getEndKey()) > 0) { + // handles the case where appending a null byte would cause the start key to be greater than the end key + Range skipRange = new Range(range.getEndKey(), true, range.getEndKey(), range.isEndKeyInclusive()); + super.seek(skipRange, columnFamilies, inclusive); + } else { + Range skipRange = new Range(skip, true, range.getEndKey(), range.isEndKeyInclusive()); + super.seek(skipRange, columnFamilies, inclusive); + } + } else { + super.seek(range, columnFamilies, inclusive); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/data/parsers/ShardIndexKey.java b/warehouse/query-core/src/main/java/datawave/query/data/parsers/ShardIndexKey.java new file mode 100644 index 00000000000..074daef3cae --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/data/parsers/ShardIndexKey.java @@ -0,0 +1,98 @@ +package datawave.query.data.parsers; + +import org.apache.accumulo.core.data.ByteSequence; +import org.apache.accumulo.core.data.Key; + +public class ShardIndexKey implements KeyParser { + + private Key key; + + private ByteSequence cq; + private int cqSplit; + + private String field; + private String value; + private String datatype; + private String shard; + + @Override + public void parse(Key k) { + clearState(); + this.key = k; + } + + @Override + public void clearState() { + this.cq = null; + this.cqSplit = -1; + + this.field = null; + this.value = null; + this.datatype = null; + this.shard = null; + } + + @Override + public String getDatatype() { + if (datatype == null) { + if (cq == null) { + cq = key.getColumnQualifierData(); + for (int i = cq.length() - 1; i > 0; i--) { + if (cq.byteAt(i) == 0x00) { + cqSplit = i; + break; + } + } + } + datatype = cq.subSequence(cqSplit + 1, cq.length()).toString(); + } + return datatype; + } + + public String getShard() { + if (shard == null) { + if (cq == null) { + cq = key.getColumnQualifierData(); + for (int i = 0; i < cq.length(); i++) { + if (cq.byteAt(i) == 0x00) { + cqSplit = i; + break; + } + } + } + shard = cq.subSequence(0, cqSplit).toString(); + } + return shard; + } + + @Override + public String getUid() { + throw new UnsupportedOperationException(getClass().getSimpleName() + " does not implement this method"); + } + + @Override + public String getRootUid() { + throw new UnsupportedOperationException(getClass().getSimpleName() + " does not implement this method"); + } + + @Override + public String getField() { + if (field == null) { + field = key.getColumnFamily().toString(); + } + return field; + } + + @Override + public String getValue() { + if (value == null) { + value = key.getRow().toString(); + } + return value; + } + + @Override + public Key getKey() { + return key; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 718cfa7ff50..5410f038e9e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -5,10 +5,12 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.commons.jexl3.parser.ASTEQNode; import org.apache.commons.jexl3.parser.ASTERNode; import org.apache.commons.jexl3.parser.ASTFunctionNode; @@ -20,6 +22,8 @@ import org.apache.commons.jexl3.parser.JexlNodes; import org.apache.commons.jexl3.parser.ParserTreeConstants; +import datawave.data.type.Type; +import datawave.query.Constants; import datawave.query.attributes.AttributeFactory; import datawave.query.attributes.UniqueFields; import datawave.query.config.ShardQueryConfiguration; @@ -73,7 +77,7 @@ public JexlNode getIndexQuery(ShardQueryConfiguration config, MetadataHelper hel return getIndexQuery(); case QueryFunctions.INCLUDE_TEXT: // Return the appropriate index query. - return getTextIndexQuery(); + return getTextIndexQuery(helper); default: // Return the true node if unable to parse arguments. return TRUE_NODE; @@ -101,24 +105,75 @@ private JexlNode getIndexQuery() { } } - private JexlNode getTextIndexQuery() { - JexlNode node0 = args.get(0); - final String value = JexlNodes.getIdentifierOrLiteralAsString(args.get(1)); - if (node0 instanceof ASTIdentifier) { - final String field = JexlASTHelper.deconstructIdentifier(((ASTIdentifier) node0).getName()); - return JexlNodeFactory.buildNode((ASTEQNode) null, field, value); - } else { - // node0 is an Or node or an And node - // copy it - JexlNode newParent = JexlNodeFactory.shallowCopy(node0); - int i = 0; - for (ASTIdentifier identifier : JexlASTHelper.getIdentifiers(node0)) { - String field = JexlASTHelper.deconstructIdentifier(identifier.getName()); - JexlNode kid = JexlNodeFactory.buildNode((ASTEQNode) null, field, value); - kid.jjtSetParent(newParent); - newParent.jjtAddChild(kid, i++); + /** + * The index query for a text function MUST normalize the value as the actual value may differ between the event key and the index key + * + * @param helper + * a metadata helper + * @return a JexlNode + */ + private JexlNode getTextIndexQuery(MetadataHelper helper) { + List children = new LinkedList<>(); + + if (args.size() == 2) { + // single field value + createChildren(children, args.get(0), args.get(1), helper); + } else if (args.size() % 2 == 1) { + // dealing with {AND/OR, field, value, field value} + for (int i = 1; i < args.size(); i += 2) { + createChildren(children, args.get(i), args.get(i + 1), helper); } - return newParent; + } + + switch (children.size()) { + case 0: + return null; + case 1: + return children.get(0); + default: + // expand into an OR, unless an intersection is specifically requested + String expansion = JexlASTHelper.getIdentifier(args.get(0)); + if (expansion.equals("AND")) { + return JexlNodeFactory.createAndNode(children); + } else { + return JexlNodeFactory.createOrNode(children); + } + } + } + + private void createChildren(List children, JexlNode fieldName, JexlNode fieldValue, MetadataHelper helper) { + String field = JexlASTHelper.deconstructIdentifier(((ASTIdentifier) fieldName).getName()); + String literal = JexlNodes.getIdentifierOrLiteralAsString(fieldValue); + Set values = getNormalizedValues(field, literal, helper); + for (String value : values) { + children.add(JexlNodeFactory.buildNode((ASTEQNode) null, field, value)); + } + } + + private Set getNormalizedValues(String field, String value, MetadataHelper helper) { + Set values = new HashSet<>(); + values.add(value); // retain original + + Set> types = getTypesForField(field, helper); + for (Type type : types) { + try { + values.add(type.normalize(value)); + } catch (IllegalArgumentException e) { + // failure to normalize is not a problem + } + } + return values; + } + + private Set> getTypesForField(String field, MetadataHelper helper) { + try { + if (field.equals(Constants.ANY_FIELD)) { + return helper.getAllDatatypes(); + } else { + return helper.getDatatypesForField(field); + } + } catch (InstantiationException | TableNotFoundException | IllegalAccessException e) { + throw new RuntimeException(e); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/AsyncIndexLookup.java b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/AsyncIndexLookup.java index 46b6f94005a..95c4bfd3d5b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/AsyncIndexLookup.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/AsyncIndexLookup.java @@ -1,5 +1,6 @@ package datawave.query.jexl.lookups; +import java.util.concurrent.CancellationException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -82,7 +83,7 @@ protected void timedScanWait(Future future, CountDownLatch startedLatch } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); - } catch (TimeoutException e) { + } catch (TimeoutException | CancellationException e) { future.cancel(true); try { diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/BoundedRangeIndexLookup.java b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/BoundedRangeIndexLookup.java index 4300300771a..810e15163a0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/BoundedRangeIndexLookup.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/BoundedRangeIndexLookup.java @@ -179,8 +179,9 @@ public synchronized void submit() { MessageFormat.format("Table: {0}", config.getIndexTableName())); log.error(qe); throw new DatawaveFatalQueryException(qe); - } + // Note: scanners should never be closed here in a 'finally' block. The lookup() + // method will close the scanner via scannerFactory.close(scanner) } } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldExpansionIndexLookup.java b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldExpansionIndexLookup.java new file mode 100644 index 00000000000..02cd93e0b05 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldExpansionIndexLookup.java @@ -0,0 +1,174 @@ +package datawave.query.jexl.lookups; + +import java.util.HashSet; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.PartialKey; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; + +import datawave.core.iterators.FieldExpansionIterator; +import datawave.query.config.ShardQueryConfiguration; +import datawave.query.tables.ScannerFactory; +import datawave.util.time.DateHelper; + +/** + * An {@link IndexLookup} that wraps {@link FieldExpansionIterator}. + */ +public class FieldExpansionIndexLookup extends AsyncIndexLookup { + + private static final Logger log = LoggerFactory.getLogger(FieldExpansionIndexLookup.class); + + protected String term; + protected Future timedScanFuture; + protected AtomicLong lookupStartTimeMillis = new AtomicLong(Long.MAX_VALUE); + protected CountDownLatch lookupStartedLatch; + protected CountDownLatch lookupStoppedLatch; + + private Scanner scanner; + + public FieldExpansionIndexLookup(ShardQueryConfiguration config, ScannerFactory scannerFactory, String term, Set fields, + ExecutorService execService) { + super(config, scannerFactory, true, execService); + this.term = term; + this.fields = new HashSet<>(); + if (fields != null) { + this.fields.addAll(fields); + } + } + + @Override + public void submit() { + if (indexLookupMap == null) { + indexLookupMap = new IndexLookupMap(config.getMaxUnfieldedExpansionThreshold(), config.getMaxValueExpansionThreshold()); + + try { + scanner = scannerFactory.newSingleScanner(config.getIndexTableName(), config.getAuthorizations(), config.getQuery()); + + Range range = getScanRange(); + scanner.setRange(range); + + for (String field : fields) { + scanner.fetchColumnFamily(new Text(field)); + } + + IteratorSetting setting = createIteratorSetting(); + scanner.addScanIterator(setting); + + timedScanFuture = execService.submit(createTimedCallable(scanner)); + } catch (Exception e) { + log.error("Error expanding term into discrete fields", e); + // close scanner in case of an exception prior to execution of future + scannerFactory.close(scanner); + throw new RuntimeException(e); + } + // Note: scanners should never be closed here in a 'finally' block. The createTimedCallable + // method will close the scanner via scannerFactory.close(scanner) + } + } + + private Range getScanRange() { + Preconditions.checkNotNull(term); + Key startKey = new Key(term); + return new Range(startKey, true, startKey.followingKey(PartialKey.ROW), false); + } + + private IteratorSetting createIteratorSetting() { + int priority = config.getBaseIteratorPriority() + 24; + IteratorSetting setting = new IteratorSetting(priority, FieldExpansionIterator.class.getSimpleName(), FieldExpansionIterator.class); + + setting.addOption(FieldExpansionIterator.START_DATE, DateHelper.format(config.getBeginDate())); + setting.addOption(FieldExpansionIterator.END_DATE, DateHelper.format(config.getEndDate())); + + if (!config.getDatatypeFilter().isEmpty()) { + setting.addOption(FieldExpansionIterator.DATATYPES, Joiner.on(',').join(config.getDatatypeFilter())); + } + + if (!fields.isEmpty()) { + setting.addOption(FieldExpansionIterator.FIELDS, Joiner.on(',').join(fields)); + } + + return setting; + } + + @Override + public IndexLookupMap lookup() { + try { + timedScanWait(timedScanFuture, lookupStartedLatch, lookupStoppedLatch, lookupStartTimeMillis, config.getMaxAnyFieldScanTimeMillis()); + } finally { + if (scanner != null) { + scannerFactory.close(scanner); + } + } + + return indexLookupMap; + } + + protected Callable createTimedCallable(final Scanner scanner) { + lookupStartedLatch = new CountDownLatch(1); + lookupStoppedLatch = new CountDownLatch(1); + + return () -> { + try { + lookupStartTimeMillis.set(System.currentTimeMillis()); + lookupStartedLatch.countDown(); + + final Text holder = new Text(); + + try { + for (Entry entry : scanner) { + // check for interrupt which may be triggered by closing the batch scanner + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + if (log.isTraceEnabled()) { + log.trace("Index entry: {}", entry.getKey()); + } + + entry.getKey().getRow(holder); + String row = holder.toString(); + + entry.getKey().getColumnFamily(holder); + String columnFamily = holder.toString(); + + // We are only returning a mapping of field name to field value, no need to + // determine cardinality and such at this point. + if (log.isTraceEnabled()) { + log.trace("put {}:{}", columnFamily, row); + } + indexLookupMap.put(columnFamily, row); + + // if we passed the term expansion threshold, then simply return + if (indexLookupMap.isKeyThresholdExceeded()) { + break; + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + scannerFactory.close(scanner); + } + + return true; + } finally { + lookupStoppedLatch.countDown(); + } + }; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldNameIndexLookup.java b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldNameIndexLookup.java index b40001d5fd2..c9992cc4be0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldNameIndexLookup.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/FieldNameIndexLookup.java @@ -179,6 +179,7 @@ protected Callable createTimedCallable(final Iterator> continue; } } + // We are only returning a mapping of field name to field value, no need to // determine cardinality and such at this point. indexLookupMap.put(colfam, row); diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/IndexLookupMap.java b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/IndexLookupMap.java index 9b7eafc5e89..c8dfd270c04 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/IndexLookupMap.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/lookups/IndexLookupMap.java @@ -185,7 +185,6 @@ public void removeFields(Collection fieldNamesToRemove) { */ public void setKeyThresholdExceeded() { exceededKeyThreshold = true; - } public void setPatterns(Set patterns) { diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTerms.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTerms.java index b4b5b56a511..bc12812e53f 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTerms.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTerms.java @@ -18,6 +18,7 @@ import java.util.Map; import java.util.Set; +import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.commons.jexl3.parser.ASTAndNode; import org.apache.commons.jexl3.parser.ASTEQNode; import org.apache.commons.jexl3.parser.ASTERNode; @@ -44,6 +45,7 @@ import datawave.data.type.IpAddressType; import datawave.data.type.OneToManyNormalizerType; import datawave.data.type.Type; +import datawave.query.Constants; import datawave.query.config.ShardQueryConfiguration; import datawave.query.exceptions.DatawaveFatalQueryException; import datawave.query.jexl.JexlASTHelper; @@ -152,7 +154,7 @@ public Object visit(ASTFunctionNode node, Object data) { @Override public Object visit(ASTAndNode node, Object data) { - /** + /* * If we have an exceeded value or term predicate we can safely assume that expansion has occurred in the unfielded expansion along with all types */ QueryPropertyMarker.Instance marker = QueryPropertyMarker.findInstance(node); @@ -310,6 +312,16 @@ protected JexlNode expandNodeForNormalizers(JexlNode node, Object data) { Set> dataTypes = Sets.newHashSet(config.getQueryFieldsDatatypes().get(fieldName)); dataTypes.addAll(config.getNormalizedFieldsDatatypes().get(fieldName)); + // all normalizers must be applied to an ANYFIELD term + if (fieldName.equals(Constants.ANY_FIELD)) { + try { + dataTypes.addAll(helper.getAllDatatypes()); + } catch (InstantiationException | IllegalAccessException | TableNotFoundException e) { + log.error("Could not fetch all DataTypes while expanding unfielded term"); + throw new RuntimeException(e); + } + } + // Catch the case of the user entering FIELD == null if (!dataTypes.isEmpty() && null != literal) { try { @@ -372,16 +384,19 @@ protected JexlNode expandNodeForNormalizers(JexlNode node, Object data) { if (log.isTraceEnabled()) { log.trace("Could not normalize " + term + " as cidr notation with: " + normalizer.getClass()); } - failedNormalization = true; + // normalization failures do not matter for ANYFIELD terms + failedNormalization = !fieldName.equals(Constants.ANY_FIELD); } } else { - failedNormalization = true; + // normalization failures do not matter for ANYFIELD terms + failedNormalization = !fieldName.equals(Constants.ANY_FIELD); } } catch (Exception ne) { if (log.isTraceEnabled()) { log.trace("Could not normalize " + term + " using " + normalizer.getClass()); } - failedNormalization = true; + // normalization failures do not matter for ANYFIELD terms + failedNormalization = !fieldName.equals(Constants.ANY_FIELD); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitor.java index fc5d131a0bc..012a0a16a90 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitor.java @@ -27,14 +27,16 @@ import datawave.query.exceptions.EmptyUnfieldedTermExpansionException; import datawave.query.jexl.JexlASTHelper; import datawave.query.jexl.JexlNodeFactory; +import datawave.query.jexl.lookups.EmptyIndexLookup; +import datawave.query.jexl.lookups.FieldExpansionIndexLookup; import datawave.query.jexl.lookups.IndexLookup; -import datawave.query.jexl.lookups.IndexLookupMap; import datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods; import datawave.query.jexl.nodes.QueryPropertyMarker; import datawave.query.tables.ScannerFactory; import datawave.query.util.MetadataHelper; import datawave.webservice.query.exception.DatawaveErrorCode; import datawave.webservice.query.exception.NotFoundQueryException; +import jline.internal.Preconditions; /** * Visits a Jexl tree, looks for unfielded terms, and replaces them with fielded terms from the index @@ -148,14 +150,14 @@ public Object visit(ASTAndNode node, Object data) { @Override public Object visit(ASTEQNode node, Object data) { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + return buildIndexLookup(node, true, negated, () -> createFieldNameIndexLookup(node)); } @Override public Object visit(ASTNENode node, Object data) { toggleNegation(); try { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + return buildIndexLookup(node, true, negated, () -> createFieldNameIndexLookup(node)); } finally { toggleNegation(); } @@ -178,22 +180,26 @@ public Object visit(ASTNRNode node, Object data) { @Override public Object visit(ASTLTNode node, Object data) { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + // handled by BoundedRangeExpansionIterator + return super.visit(node, data); } @Override public Object visit(ASTLENode node, Object data) { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + // handled by BoundedRangeExpansionIterator + return super.visit(node, data); } @Override public Object visit(ASTGTNode node, Object data) { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + // handled by BoundedRangeExpansionIterator + return super.visit(node, data); } @Override public Object visit(ASTGENode node, Object data) { - return buildIndexLookup(node, true, negated, () -> createLookup(node)); + // handled by BoundedRangeExpansionIterator + return super.visit(node, data); } @Override @@ -228,4 +234,27 @@ protected IndexLookup createLookup(JexlNode node) { throw new DatawaveFatalQueryException(e); } } + + protected IndexLookup createFieldNameIndexLookup(JexlNode node) { + String term = (String) JexlASTHelper.getLiteralValue(node); + + Preconditions.checkNotNull(term); + Preconditions.checkNotNull(expansionFields); + + try { + // note: if the system has configured 'exp' fields in the metadata table this method call will verify + // all fields are also indexed. In the event that no expansion fields are configured this will fall back + // to the full set of indexed fields for the provided datatypes + Set fields = ShardIndexQueryTableStaticMethods.getIndexedExpansionFields(expansionFields, false, config.getDatatypeFilter(), helper); + + if (fields.isEmpty()) { + // if no fields match then do not attempt expansion + return new EmptyIndexLookup(config); + } + + return new FieldExpansionIndexLookup(config, scannerFactory, term, fields, executor); + } catch (TableNotFoundException e) { + throw new RuntimeException(e); + } + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index f4d053cdd26..4bc1f729049 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -949,6 +949,9 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard TraceStopwatch stopwatch = null; + // need to fetch field to datatype map first + timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config); + if (!disableWhindexFieldMappings) { // apply the value-specific field mappings for GeoWave functions config.setQueryTree(timedApplyWhindexFieldMappings(timers, config.getQueryTree(), config, metadataHelper, settings)); @@ -961,9 +964,15 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard // apply the node transform rules // running it here before any unfielded expansions to enable potentially pushing down terms before index lookups + // need to run this before normalization expansion otherwise nasty regexes could slip through config.setQueryTree(timedApplyNodeTransformRules(timers, "Apply Node Transform Rules - Pre Unfielded Expansions", config.getQueryTree(), config, metadataHelper, getTransformRules())); + // must expand multi-normalized terms after index queries but before index expansion + // for example, f:includeText(_ANYFIELD_, 'value') would get missed by the multi normalizer visitor, but expanding it first allows the EQ node to get + // expanded + config.setQueryTree(timedExpandMultiNormalizedTerms(timers, config.getQueryTree(), config, metadataHelper)); + // Find unfielded terms, and fully qualify them with an OR of all fields // found in the index // If the max term expansion is reached, then the original query tree is @@ -989,12 +998,8 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard config.setQueryTree(timedApplyNodeTransformRules(timers, "Apply Node Transform Rules - Pre Regex/Range Expansions", config.getQueryTree(), config, metadataHelper, getTransformRules())); - timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config); - config.setQueryTree(timedFixUnindexedNumerics(timers, config.getQueryTree(), config)); - config.setQueryTree(timedExpandMultiNormalizedTerms(timers, config.getQueryTree(), config, metadataHelper)); - // if we have any index holes, then mark em if (!config.getIndexHoles().isEmpty()) { config.setQueryTree(timedMarkIndexHoles(timers, config.getQueryTree(), config, metadataHelper)); @@ -1105,6 +1110,9 @@ protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, Shard } } + // fields may have been added or removed from the query, need to update the field to type map + timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config); + return config.getQueryTree(); } @@ -3167,7 +3175,7 @@ protected void setCachedFields(Set indexedFields, Set reverseInd Multimap> normalizedFieldMap, ShardQueryConfiguration config) { config.setIndexedFields(indexedFields); config.setReverseIndexedFields(reverseIndexedFields); - config.setQueryFieldsDatatypes(queryFieldMap); + updateQueryFieldsDatatypes(config, queryFieldMap); config.setNormalizedFieldsDatatypes(normalizedFieldMap); } @@ -3182,7 +3190,9 @@ protected Multimap> configureIndexedAndNormalizedFields(Multimap< log.debug("normalizedFields = " + normalizedFields); - config.setQueryFieldsDatatypes(HashMultimap.create(Multimaps.filterKeys(fieldToDatatypeMap, input -> !normalizedFields.contains(input)))); + Multimap> queryFieldToDatatypeMap = HashMultimap + .create(Multimaps.filterKeys(fieldToDatatypeMap, input -> !normalizedFields.contains(input))); + updateQueryFieldsDatatypes(config, queryFieldToDatatypeMap); log.debug("IndexedFields Datatypes: " + config.getQueryFieldsDatatypes()); config.setNormalizedFieldsDatatypes(HashMultimap.create(Multimaps.filterKeys(fieldToDatatypeMap, normalizedFields::contains))); @@ -3195,7 +3205,12 @@ protected Multimap> configureIndexedAndNormalizedFields(Multimap< } return fieldToDatatypeMap; + } + protected void updateQueryFieldsDatatypes(ShardQueryConfiguration config, Multimap> queryFieldMap) { + Multimap> queryFieldToDatatypeMap = config.getQueryFieldsDatatypes(); + queryFieldToDatatypeMap.putAll(queryFieldMap); + config.setQueryFieldsDatatypes(queryFieldToDatatypeMap); } public void setDisableTestNonExistentFields(boolean disableTestNonExistentFields) { diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java b/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java index 71621d58a5a..081959f8777 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java @@ -29,7 +29,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; -import org.geotools.data.Join; import com.google.common.base.Function; import com.google.common.base.Joiner; diff --git a/warehouse/query-core/src/test/java/datawave/core/iterators/FieldExpansionIteratorTest.java b/warehouse/query-core/src/test/java/datawave/core/iterators/FieldExpansionIteratorTest.java new file mode 100644 index 00000000000..89404969c48 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/core/iterators/FieldExpansionIteratorTest.java @@ -0,0 +1,249 @@ +package datawave.core.iterators; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.accumulo.core.client.AccumuloClient; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.PartialKey; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; + +import datawave.accumulo.inmemory.InMemoryAccumuloClient; +import datawave.accumulo.inmemory.InMemoryInstance; +import datawave.util.TableName; + +public class FieldExpansionIteratorTest { + + private static AccumuloClient client; + private static final Value value = new Value(); + + private String startDate; + private String endDate; + private final Set fields = new HashSet<>(); + private final Set datatypes = new HashSet<>(); + private final Set expected = new TreeSet<>(); + + @BeforeAll + public static void beforeAll() throws Exception { + InMemoryInstance instance = new InMemoryInstance(); + client = new InMemoryAccumuloClient("user", instance); + client.tableOperations().create(TableName.SHARD_INDEX); + writeData(); + } + + private static void writeData() throws Exception { + try (BatchWriter bw = client.createBatchWriter(TableName.SHARD_INDEX)) { + Mutation m = new Mutation("value"); + // FIELD_A is present in all three days across most datatypes + m.put("FIELD_A", "20241021_0\u0000datatype-a", value); + m.put("FIELD_A", "20241022_0\u0000datatype-a", value); + m.put("FIELD_A", "20241022_0\u0000datatype-b", value); + m.put("FIELD_A", "20241022_0\u0000datatype-c", value); + m.put("FIELD_A", "20241023_0\u0000datatype-a", value); + m.put("FIELD_A", "20241023_0\u0000datatype-b", value); + + // FIELD_B is present in first two days for most datatypes + m.put("FIELD_B", "20241021_0\u0000datatype-a", value); + m.put("FIELD_B", "20241022_0\u0000datatype-a", value); + m.put("FIELD_B", "20241022_0\u0000datatype-b", value); + m.put("FIELD_B", "20241022_0\u0000datatype-c", value); + + // FIELD_C is present in first two days for some datatypes + m.put("FIELD_C", "20241021_0\u0000datatype-a", value); + m.put("FIELD_C", "20241022_0\u0000datatype-a", value); + m.put("FIELD_C", "20241022_0\u0000datatype-b", value); + m.put("FIELD_C", "20241022_0\u0000datatype-c", value); + + // FIELD_D supports verification of seeking by start date + for (int i = 10; i < 30; i++) { + m.put("FIELD_D", "202410" + i + "_0\u0000datatype-a", value); + } + + // FIELD_E tests a specific datatype case + m.put("FIELD_E", "20241023_0\u0000datatype-d", value); + + bw.addMutation(m); + } + } + + @BeforeEach + public void beforeEach() { + startDate = null; + endDate = null; + fields.clear(); + datatypes.clear(); + expected.clear(); + } + + @Test + public void testSingleDay_noDatatypes() throws Exception { + withDate("20241021", "20241021"); + withExpected(Set.of("FIELD_A", "FIELD_B", "FIELD_C", "FIELD_D")); + drive(); + } + + @Test + public void testSingleDay_withDatatype() throws Exception { + withDate("20241021", "20241021"); + withDatatypes(Set.of("datatype-a")); + withExpected(Set.of("FIELD_A", "FIELD_B", "FIELD_C", "FIELD_D")); + drive(); + } + + @Test + public void testSingleDay_exclusiveDatatype() throws Exception { + withDate("20241021", "20241021"); + withDatatypes(Set.of("datatype-z")); + drive(); + } + + @Test + public void testAllDays_noDatatypes() throws Exception { + withDate("20241021", "20241023"); + withExpected(Set.of("FIELD_A", "FIELD_B", "FIELD_C", "FIELD_D", "FIELD_E")); + drive(); + } + + @Test + public void testAllDays_allDatatypes() throws Exception { + withDate("20241021", "20241023"); + withDatatypes(Set.of("datatype-a", "datatype-b", "datatype-c")); + withExpected(Set.of("FIELD_A", "FIELD_B", "FIELD_C", "FIELD_D")); + drive(); + } + + @Test + public void testAllDays_someDatatypes() throws Exception { + withDate("20241021", "20241023"); + withDatatypes(Set.of("datatype-b", "datatype-c")); + withExpected(Set.of("FIELD_A", "FIELD_B", "FIELD_C")); + drive(); + } + + @Test + public void testAllDays_exclusiveDatatypes() throws Exception { + withDate("20241021", "20241023"); + withDatatypes(Set.of("datatype-z")); + drive(); + } + + @Test + public void testSingleDay_withFields_noDatatypes() throws Exception { + withDate("20241023", "20241023"); + withFields(Set.of("FIELD_A")); + withExpected(Set.of("FIELD_A")); + drive(); + } + + @Test + public void testSingleDay_exclusiveFields_noDatatypes() throws Exception { + withDate("20241023", "20241023"); + withFields(Set.of("FIELD_B")); // FIELD_B doesn't exist in 2024-10-23 + drive(); + } + + @Test + public void testSingleDay_withFields_withDatatypes() throws Exception { + withDate("20241023", "20241023"); + withDatatypes(Set.of("datatype-b")); // datatype exists in 2024-10-23 + withFields(Set.of("FIELD_A")); + withExpected(Set.of("FIELD_A")); + drive(); + } + + @Test + public void testSingleDay_withFields_exclusiveDatatypes() throws Exception { + withDate("20241023", "20241023"); + withDatatypes(Set.of("datatype-c")); // datatype does not exist in 2024-10-23 + withFields(Set.of("FIELD_A")); + drive(); + } + + @Test + public void testSingleDayWithDataAcrossLargeDateRage() throws Exception { + withDate("20241023", "20241023"); + withDatatypes(Set.of("datatype-a")); + withFields(Set.of("FIELD_A", "FIELD_D")); + withExpected(Set.of("FIELD_A", "FIELD_D")); + drive(); + } + + @Test + public void testDatatypeSortsAfterTopKey() throws Exception { + withDate("20241023", "20241023"); + withDatatypes(Set.of("datatype-e")); + drive(); + } + + public void drive() throws Exception { + Preconditions.checkNotNull(startDate); + Preconditions.checkNotNull(endDate); + + IteratorSetting setting = new IteratorSetting(25, FieldExpansionIterator.class.getSimpleName(), FieldExpansionIterator.class); + setting.addOption(FieldExpansionIterator.START_DATE, startDate); + setting.addOption(FieldExpansionIterator.END_DATE, endDate); + + if (!fields.isEmpty()) { + setting.addOption(FieldExpansionIterator.FIELDS, Joiner.on(',').join(fields)); + } + + if (!datatypes.isEmpty()) { + setting.addOption(FieldExpansionIterator.DATATYPES, Joiner.on(',').join(datatypes)); + } + + Set results = new TreeSet<>(); + try (Scanner scanner = client.createScanner(TableName.SHARD_INDEX)) { + scanner.addScanIterator(setting); + + Key start = new Key("value"); + Range range = new Range(start, true, start.followingKey(PartialKey.ROW), false); + scanner.setRange(range); + + for (Map.Entry keyValueEntry : scanner) { + Key key = keyValueEntry.getKey(); + results.add(key.getColumnFamily().toString()); + } + } + + assertEquals(expected, results); + } + + public void withDate(String startDate, String endDate) { + assertNotNull(startDate); + assertNotNull(endDate); + this.startDate = startDate; + this.endDate = endDate; + } + + public void withDatatypes(Set datatypes) { + assertFalse(datatypes.isEmpty()); + this.datatypes.addAll(datatypes); + } + + public void withFields(Set fields) { + assertFalse(fields.isEmpty()); + this.fields.addAll(fields); + } + + public void withExpected(Set expected) { + assertFalse(expected.isEmpty()); + this.expected.addAll(expected); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/AnyFieldQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/AnyFieldQueryTest.java index 76c962e47d8..132d27c9199 100644 --- a/warehouse/query-core/src/test/java/datawave/query/AnyFieldQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/AnyFieldQueryTest.java @@ -9,14 +9,12 @@ import static datawave.query.testframework.RawDataManager.RN_OP; import static org.junit.Assert.fail; -import java.lang.reflect.InvocationTargetException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.Map; import java.util.Properties; -import java.util.Set; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.AccumuloClient; @@ -31,7 +29,6 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.MultiTableBatchWriter; import org.apache.accumulo.core.client.Scanner; -import org.apache.accumulo.core.client.ScannerBase; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.InstanceOperations; import org.apache.accumulo.core.client.admin.NamespaceOperations; @@ -45,7 +42,6 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; import org.apache.hadoop.io.Text; -import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -53,22 +49,14 @@ import com.google.common.collect.Multimap; -import datawave.accumulo.inmemory.InMemoryAccumuloClient; -import datawave.accumulo.inmemory.InMemoryInstance; import datawave.data.ColumnFamilyConstants; import datawave.ingest.data.config.ingest.CompositeIngest; -import datawave.microservice.query.Query; import datawave.query.exceptions.DatawaveFatalQueryException; import datawave.query.exceptions.FullTableScansDisallowedException; import datawave.query.jexl.JexlASTHelper; import datawave.query.planner.DefaultQueryPlanner; import datawave.query.planner.FederatedQueryPlanner; import datawave.query.planner.rules.RegexPushdownTransformRule; -import datawave.query.tables.AnyFieldScanner; -import datawave.query.tables.ResourceQueue; -import datawave.query.tables.ScannerFactory; -import datawave.query.tables.ScannerSession; -import datawave.query.tables.SessionOptions; import datawave.query.testframework.AbstractFunctionalQuery; import datawave.query.testframework.AccumuloSetup; import datawave.query.testframework.CitiesDataType; @@ -78,7 +66,6 @@ import datawave.query.testframework.FieldConfig; import datawave.query.testframework.FileType; import datawave.query.testframework.GenericCityFields; -import datawave.query.testframework.RawDataManager; public class AnyFieldQueryTest extends AbstractFunctionalQuery { @@ -445,6 +432,7 @@ public void testAndAnd_defaultQueryPlanner() throws Exception { String anyState = this.dataManager.convertAnyField(statePhrase); String anyCont = this.dataManager.convertAnyField(contPhrase); anyQuery = anyCity + AND_OP + anyState + AND_OP + anyCont; + runTest(query, anyQuery); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/TextFunctionQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/TextFunctionQueryTest.java index 0382a96535c..66507cda1e6 100644 --- a/warehouse/query-core/src/test/java/datawave/query/TextFunctionQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/TextFunctionQueryTest.java @@ -50,19 +50,14 @@ public TextFunctionQueryTest() { @Test public void testAnyFieldText() throws Exception { log.info("------ testAnyFieldText ------"); - String code = "europe"; // must be same case as original value in event - String state = "Lazio"; - String phrase = EQ_OP + "'" + state + "'"; - String query = CityField.CONTINENT.name() + ":\"" + code + "\"" + AND_OP + "#TEXT(" + state + ")"; - String expect = CityField.CONTINENT.name() + EQ_OP + "'" + code + "'" + AND_OP + this.dataManager.convertAnyField(phrase); + String query = "CONTINENT:europe and #TEXT(Lazio)"; + String expect = "CONTINENT == 'europe' and (CITY == 'Lazio' or CONTINENT == 'Lazio' or STATE == 'Lazio')"; runTest(query, expect); - // testing that incorrect case misses results - state = "lazio"; - query = CityField.CONTINENT.name() + ":\"" + code + "\"" + AND_OP + "#TEXT(" + state + ")"; - // should return the empty set - runTestQuery(Collections.EMPTY_SET, query); + // testing that incorrect case misses results, query should return an empty set + query = "CONTINENT:\"europe\" and #TEXT(lazio)"; + runTestQuery(Collections.emptySet(), query); } @Test @@ -94,27 +89,19 @@ public void testExplicitAnyFieldText() throws Exception { @Test public void testMultiFieldText() throws Exception { log.info("------ testMultiFieldText ------"); - String code = "europe"; - String state1 = "Lazio"; - String state2 = "London"; - String phrase1 = EQ_OP + "'" + state1 + "'"; - String phrase2 = EQ_OP + "'" + state2 + "'"; - String query = CityField.CONTINENT.name() + ":\"" + code + "\"" + AND_OP + "#TEXT(OR, STATE," + state1 + ", STATE, " + state2 + ")"; - String expect = CityField.CONTINENT.name() + EQ_OP + "'" + code + "'" + AND_OP + "( STATE" + phrase1 + OR_OP + "STATE" + phrase2 + " )"; + + String query = "CONTINENT:europe and #TEXT(OR, STATE, Lazio, STATE, London)"; + String expect = "CONTINENT == 'europe' and ( STATE == 'Lazio' or STATE == 'London' )"; runTest(query, expect); - // testing that incorrect case misses results - state2 = "london"; - query = CityField.CONTINENT.name() + ":\"" + code + "\"" + AND_OP + "#TEXT(OR, STATE," + state1 + ", STATE, " + state2 + ")"; - // should return only the Lazio events, and not the London events - expect = CityField.CONTINENT.name() + EQ_OP + "'" + code + "'" + AND_OP + "STATE" + phrase1; + // lowercase 'london' will fail to return all of those events, leaving only the 'Lazio' events + query = "CONTINENT:europe and #TEXT(OR, STATE, Lazio, STATE, london)"; + expect = "CONTINENT == 'europe' and STATE == 'Lazio'"; runTest(query, expect); - // testing that incorrect case misses results - state1 = "lazio"; - query = CityField.CONTINENT.name() + ":\"" + code + "\"" + AND_OP + "#TEXT(OR, STATE," + state1 + ", STATE, " + state2 + ")"; - // should return the empty set - runTestQuery(Collections.EMPTY_SET, query); + // incorrect case for 'lazio' and 'london' will find zero hits + query = "CONTINENT:\"europe\" and #TEXT(OR, STATE,lazio, STATE, london)"; + runTestQuery(Collections.emptySet(), query); } // ============================================ diff --git a/warehouse/query-core/src/test/java/datawave/query/UnindexedNumericQueryTest.java b/warehouse/query-core/src/test/java/datawave/query/UnindexedNumericQueryTest.java index 58d22dc60b4..ab754fec120 100644 --- a/warehouse/query-core/src/test/java/datawave/query/UnindexedNumericQueryTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/UnindexedNumericQueryTest.java @@ -67,8 +67,7 @@ public void testNumericTerm() throws Exception { log.info("------ testNumericTerm ------"); String min = "115"; - String iowa = "'indiana'"; - String query = CityField.STATE.name() + EQ_OP + iowa + AND_OP + CityField.NUM.name() + GT_OP + min; + String query = "STATE == 'indiana' and NUM > 115"; ShardQueryConfiguration config = (ShardQueryConfiguration) setupConfig(query); // verify NUM is NumberType diff --git a/warehouse/query-core/src/test/java/datawave/query/data/parsers/ShardIndexKeyTest.java b/warehouse/query-core/src/test/java/datawave/query/data/parsers/ShardIndexKeyTest.java new file mode 100644 index 00000000000..6260d10a291 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/data/parsers/ShardIndexKeyTest.java @@ -0,0 +1,41 @@ +package datawave.query.data.parsers; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.apache.accumulo.core.data.Key; +import org.junit.jupiter.api.Test; + +public class ShardIndexKeyTest { + + private final Key shardIndexKey = new Key("value", "FIELD", "20241021_0\u0000datatype"); + + private final KeyParser parser = new ShardIndexKey(); + + @Test + public void testShardIndexKeyParse() { + parser.parse(shardIndexKey); + + assertEquals("FIELD", parser.getField()); + assertEquals("value", parser.getValue()); + assertEquals("datatype", parser.getDatatype()); + } + + @Test + public void testUidException() { + parser.parse(shardIndexKey); + assertThrows(UnsupportedOperationException.class, parser::getUid); + } + + @Test + public void testRootUidException() { + parser.parse(shardIndexKey); + assertThrows(UnsupportedOperationException.class, parser::getRootUid); + } + + @Test + public void testKeyEquals() { + parser.parse(shardIndexKey); + assertEquals(shardIndexKey, parser.getKey()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java index 85d00ef7328..9bf282d175b 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/functions/QueryFunctionsDescriptorTest.java @@ -4,16 +4,32 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.fail; +import java.util.Collections; import java.util.Set; import org.apache.commons.jexl3.parser.ASTFunctionNode; import org.apache.commons.jexl3.parser.ASTJexlScript; import org.apache.commons.jexl3.parser.JexlNode; import org.apache.commons.jexl3.parser.ParseException; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.data.type.LcType; +import datawave.data.type.NoOpType; +import datawave.data.type.NumberType; +import datawave.data.type.Type; +import datawave.query.config.ShardQueryConfiguration; import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.visitors.JexlStringBuildingVisitor; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.util.DateIndexHelper; +import datawave.query.util.MockDateIndexHelper; +import datawave.query.util.MockMetadataHelper; /** * Although most query functions are removed from the query by the {@link QueryOptionsFromQueryVisitor}, several functions will persist. These functions may @@ -68,6 +84,25 @@ class QueryFunctionsDescriptorTest { private final QueryFunctionsDescriptor descriptor = new QueryFunctionsDescriptor(); + private ShardQueryConfiguration config; + private MockMetadataHelper helper; + private DateIndexHelper dateIndexHelper; + + @BeforeEach + public void setup() { + config = new ShardQueryConfiguration(); + + Multimap> fieldToTypes = HashMultimap.create(); + fieldToTypes.putAll("FIELD", Sets.newHashSet(new LcNoDiacriticsType(), new LcType(), new NumberType(), new NoOpType())); + fieldToTypes.putAll("FIELD_A", Sets.newHashSet(new LcNoDiacriticsType(), new LcType(), new NumberType(), new NoOpType())); + fieldToTypes.putAll("FIELD_B", Sets.newHashSet(new LcNoDiacriticsType(), new LcType(), new NumberType(), new NoOpType())); + + helper = new MockMetadataHelper(); + helper.setDataTypes(fieldToTypes); + + dateIndexHelper = new MockDateIndexHelper(); + } + @Test void testFields() { assertFields(singleFieldCount, Set.of("FIELD")); @@ -191,4 +226,29 @@ private ASTJexlScript getQuery(String query) { throw new RuntimeException(e); } } + + @Test + public void testIndexQuery() { + // test default functions + assertIndexQuery(include, "FIELD == 'baz'"); + assertIndexQuery(includeAnd, "(FIELD_A == 'bar' && FIELD_B == 'baz')"); + assertIndexQuery(includeOr, "(FIELD_A == 'bar' || FIELD_B == 'baz')"); + + // test fielded normalizations + assertIndexQuery("f:includeText(FIELD, 'abc')", "FIELD == 'abc'"); + assertIndexQuery("f:includeText(FIELD_A, 'BaZ')", "(FIELD_A == 'BaZ' || FIELD_A == 'baz')"); + assertIndexQuery("f:includeText(FIELD_B, '123')", "(FIELD_B == '123' || FIELD_B == '+cE1.23')"); + + // test non-fielded normalizations + assertIndexQuery("f:includeText(_ANYFIELD_, 'abc')", "_ANYFIELD_ == 'abc'"); + assertIndexQuery("f:includeText(_ANYFIELD_, 'BaZ')", "(_ANYFIELD_ == 'BaZ' || _ANYFIELD_ == 'baz')"); + assertIndexQuery("f:includeText(_ANYFIELD_, '123')", "(_ANYFIELD_ == '123' || _ANYFIELD_ == '+cE1.23')"); + } + + private void assertIndexQuery(String query, String expected) { + QueryJexlArgumentDescriptor argDescriptor = getDescriptor(query); + JexlNode expanded = argDescriptor.getIndexQuery(config, helper, dateIndexHelper, Collections.emptySet()); + String result = JexlStringBuildingVisitor.buildQuery(expanded); + assertEquals(expected, result); + } } diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTermsTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTermsTest.java index c18c8f53491..8338628d280 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTermsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/ExpandMultiNormalizedTermsTest.java @@ -692,6 +692,60 @@ public void testNENodes() throws ParseException { expandTerms(original, expected); } + /** + * For each node type test all lowercase, mixed case, and numeric + * + * @throws ParseException + * if the query fails to parse + */ + @Test + public void testAnyFieldTerms() throws ParseException { + + Multimap> dataTypes = HashMultimap.create(); + dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType(), new LcType(), new NumberType(), new NoOpType())); + helper.setDataTypes(dataTypes); + + // EQ + expandTerms("_ANYFIELD_ == 'anywhere'", "_ANYFIELD_ == 'anywhere'"); + expandTerms("_ANYFIELD_ == 'oHIo'", "_ANYFIELD_ == 'ohio' || _ANYFIELD_ == 'oHIo'"); + expandTerms("_ANYFIELD_ == '123'", "_ANYFIELD_ == '+cE1.23' || _ANYFIELD_ == '123'"); + + // NE + expandTerms("_ANYFIELD_ != 'anywhere'", "_ANYFIELD_ != 'anywhere'"); + expandTerms("_ANYFIELD_ != 'oHIo'", "_ANYFIELD_ != 'ohio' && _ANYFIELD_ != 'oHIo'"); + expandTerms("_ANYFIELD_ != '123'", "_ANYFIELD_ != '+cE1.23' && _ANYFIELD_ != '123'"); + + // ER + expandTerms("_ANYFIELD_ =~ 'anywhere'", "_ANYFIELD_ =~ 'anywhere'"); + expandTerms("_ANYFIELD_ =~ 'oHIo'", "_ANYFIELD_ =~ 'ohio' || _ANYFIELD_ =~ 'oHIo'"); + expandTerms("_ANYFIELD_ =~ '123'", "_ANYFIELD_ =~ '\\+cE1\\.23' || _ANYFIELD_ =~ '123'"); + + // NR + expandTerms("_ANYFIELD_ !~ 'anywhere'", "_ANYFIELD_ !~ 'anywhere'"); + expandTerms("_ANYFIELD_ !~ 'oHIo'", "_ANYFIELD_ !~ 'ohio' && _ANYFIELD_ !~ 'oHIo'"); + expandTerms("_ANYFIELD_ !~ '123'", "_ANYFIELD_ !~ '\\+cE1\\.23' && _ANYFIELD_ !~ '123'"); + + // LT + expandTerms("_ANYFIELD_ < 'anywhere'", "_ANYFIELD_ < 'anywhere'"); + expandTerms("_ANYFIELD_ < 'oHIo'", "_ANYFIELD_ < 'ohio' || _ANYFIELD_ < 'oHIo'"); + expandTerms("_ANYFIELD_ < '123'", "_ANYFIELD_ < '+cE1.23' || _ANYFIELD_ < '123'"); + + // LE + expandTerms("_ANYFIELD_ <= 'anywhere'", "_ANYFIELD_ <= 'anywhere'"); + expandTerms("_ANYFIELD_ <= 'oHIo'", "_ANYFIELD_ <= 'ohio' || _ANYFIELD_ <= 'oHIo'"); + expandTerms("_ANYFIELD_ <= '123'", "_ANYFIELD_ <= '+cE1.23' || _ANYFIELD_ <= '123'"); + + // GT + expandTerms("_ANYFIELD_ > 'anywhere'", "_ANYFIELD_ > 'anywhere'"); + expandTerms("_ANYFIELD_ > 'oHIo'", "_ANYFIELD_ > 'ohio' || _ANYFIELD_ > 'oHIo'"); + expandTerms("_ANYFIELD_ > '123'", "_ANYFIELD_ > '+cE1.23' || _ANYFIELD_ > '123'"); + + // GE + expandTerms("_ANYFIELD_ >= 'anywhere'", "_ANYFIELD_ >= 'anywhere'"); + expandTerms("_ANYFIELD_ >= 'oHIo'", "_ANYFIELD_ >= 'ohio' || _ANYFIELD_ >= 'oHIo'"); + expandTerms("_ANYFIELD_ >= '123'", "_ANYFIELD_ >= '+cE1.23' || _ANYFIELD_ >= '123'"); + } + private void expandTerms(String original, String expected) throws ParseException { ASTJexlScript script = JexlASTHelper.parseJexlQuery(original); ASTJexlScript expanded = ExpandMultiNormalizedTerms.expandTerms(config, helper, script); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitorTest.java index c3f3c7eaf2c..afe3d0659d8 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnfieldedIndexExpansionVisitorTest.java @@ -1,5 +1,6 @@ package datawave.query.jexl.visitors; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; diff --git a/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java b/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java index 4779d421239..d5482f403eb 100644 --- a/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java +++ b/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java @@ -656,4 +656,20 @@ public void testSynonymTokenization() throws ParseException { // @formatter:on Assert.assertEquals(expected, parseQuery("TOKFIELD:\"/home/datawave/README.md\"")); } + + @Test + public void testEqAndTextFunction() { + test("CONTINENT == 'europe'", "CONTINENT:europe"); + test("f:includeText(_ANYFIELD_, 'Value')", "#TEXT(Value)"); + test("CONTINENT == 'europe' && f:includeText(_ANYFIELD_, 'Value')", "CONTINENT:europe and #TEXT(Value)"); + } + + private void test(String expected, String query) { + try { + String result = parseQuery(query); + assertEquals(expected, result); + } catch (Exception e) { + fail("Failed for query: " + query); + } + } } From 7f309fdb85c16f3877484b7e4ff33f8e16a1ac54 Mon Sep 17 00:00:00 2001 From: Ivan Bella <347158+ivakegg@users.noreply.github.com> Date: Wed, 15 Jan 2025 13:22:04 -0500 Subject: [PATCH 16/16] Feature/most recent unique (#2570) * Refactored the mostRecentUnique functionality to avoid the ivarator paths * Implementation a file backed sorted map * Created additional document map for the return cache --- .../map/BulkResultsFileOutputMapper.java | 1 + core/utils/metadata-utils | 2 +- .../input/microservice/QueryLogicFactory.xml | 16 + .../input/webservice/QueryLogicFactory.xml | 12 +- .../microservice/QueryLogicFactory.xml | 16 + pom.xml | 2 +- ...DatawaveFieldIndexCachingIteratorJexl.java | 7 +- .../iterators/filesystem/FileSystemCache.java | 6 +- .../java/datawave/query/QueryParameters.java | 1 + .../datawave/query/attributes/Attribute.java | 15 + .../query/attributes/UniqueFields.java | 139 ++- .../query/common/grouping/GroupFields.java | 2 +- .../query/config/ShardQueryConfiguration.java | 22 +- .../query/iterator/QueryIterator.java | 20 +- .../datawave/query/iterator/QueryOptions.java | 21 +- .../FinalDocumentTrackingIterator.java | 4 +- .../query/jexl/functions/QueryFunctions.java | 2 + .../functions/QueryFunctionsDescriptor.java | 9 + .../QueryOptionsFromQueryVisitor.java | 30 +- .../functions/jexl/MostRecentUnique.java | 72 ++ .../functions/jexl/MostRecentUniqueByDay.java | 22 + .../jexl/MostRecentUniqueByHour.java | 23 + .../jexl/MostRecentUniqueByMinute.java | 23 + .../jexl/MostRecentUniqueByMonth.java | 23 + .../jexl/MostRecentUniqueBySecond.java | 23 + .../jexl/MostRecentUniqueByTenthOfHour.java | 23 + .../jexl/MostRecentUniqueByYear.java | 23 + .../language/functions/jexl/UniqueByDay.java | 4 - .../language/functions/jexl/UniqueByHour.java | 6 +- .../functions/jexl/UniqueByMinute.java | 4 - .../query/planner/DefaultQueryPlanner.java | 5 + .../query/planner/QueryOptionsSwitch.java | 6 + .../query/tables/ShardQueryLogic.java | 56 +- .../tables/async/event/VisitorFunction.java | 2 +- .../query/transformer/DocumentTransform.java | 1 + .../query/transformer/GroupingTransform.java | 5 +- .../query/transformer/UniqueTransform.java | 448 +++++-- .../BufferedFileBackedSortedMap.java | 545 +++++++++ .../sortedmap/FileByteDocumentSortedMap.java | 207 ++++ .../sortedmap/FileKeyDocumentSortedMap.java | 206 ++++ .../util/sortedmap/FileKeyValueSortedMap.java | 206 ++++ .../sortedmap/FileSerializableSortedMap.java | 289 +++++ .../query/util/sortedmap/FileSortedMap.java | 1073 +++++++++++++++++ .../util/sortedmap/HdfsBackedSortedMap.java | 301 +++++ .../sortedmap/MultiMapBackedSortedMap.java | 407 +++++++ .../util/sortedmap/RewritableSortedMap.java | 19 + .../sortedmap/SortedMapTempFileHandler.java | 63 + .../rfile/KeyValueByteDocumentTransforms.java | 83 ++ .../rfile/RFileByteDocumentInputStream.java | 30 + .../rfile/RFileByteDocumentOutputStream.java | 26 + .../rfile/RFileKeyDocumentInputStream.java | 28 + .../rfile/RFileKeyDocumentOutputStream.java | 22 + .../rfile/RFileKeyValueInputStream.java | 26 + .../rfile/RFileKeyValueInputStreamBase.java | 79 ++ .../rfile/RFileKeyValueOutputStream.java | 20 + .../rfile/RFileKeyValueOutputStreamBase.java | 54 + .../test/java/datawave/query/UniqueTest.java | 96 +- .../query/attributes/UniqueFieldsTest.java | 8 +- .../config/ShardQueryConfigurationTest.java | 2 + .../QueryOptionsFromQueryVisitorTest.java | 103 +- .../UniqueTransformMostRecentTest.java | 86 ++ .../transformer/UniqueTransformTest.java | 363 +++--- .../datawave/query/util/WiseGuysIngest.java | 321 ++--- ...edFileBackedByteDocumentSortedMapTest.java | 71 ++ ...redFileBackedKeyDocumentSortedMapTest.java | 75 ++ ...fferedFileBackedKeyValueSortedMapTest.java | 58 + ...eredFileBackedRewritableSortedMapTest.java | 98 ++ .../BufferedFileBackedSortedMapTest.java | 395 ++++++ .../util/sortedmap/FileSortedMapTest.java | 407 +++++++ .../sortedmap/HdfsBackedSortedMapTest.java | 254 ++++ .../util/sortedmap/MergeSortIteratorTest.java | 241 ++++ .../MultiMapBackedKeyValueSortedMapTest.java | 53 + ...MultiMapBackedRewritableSortedMapTest.java | 89 ++ .../MultiMapBackedSortedMapTest.java | 327 +++++ .../KeyValueByteDocumenTransformsTest.java | 107 ++ .../datawave/query/QueryLogicFactory.xml | 14 +- .../results/cached/CachedRunningQuery.java | 4 +- .../datawave/query/QueryLogicFactory.xml | 12 +- 78 files changed, 7436 insertions(+), 528 deletions(-) create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileByteDocumentSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyDocumentSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyValueSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSerializableSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/HdfsBackedSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/MultiMapBackedSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/RewritableSortedMap.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/SortedMapTempFileHandler.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumentTransforms.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentInputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentOutputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentInputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentOutputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStreamBase.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStream.java create mode 100644 warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStreamBase.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedByteDocumentSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyDocumentSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyValueSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedRewritableSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/FileSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/HdfsBackedSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MergeSortIteratorTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedKeyValueSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedRewritableSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedSortedMapTest.java create mode 100644 warehouse/query-core/src/test/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumenTransformsTest.java diff --git a/core/map-reduce/src/main/java/datawave/core/mapreduce/bulkresults/map/BulkResultsFileOutputMapper.java b/core/map-reduce/src/main/java/datawave/core/mapreduce/bulkresults/map/BulkResultsFileOutputMapper.java index 1ce75bf2cd1..62b7f2d403e 100644 --- a/core/map-reduce/src/main/java/datawave/core/mapreduce/bulkresults/map/BulkResultsFileOutputMapper.java +++ b/core/map-reduce/src/main/java/datawave/core/mapreduce/bulkresults/map/BulkResultsFileOutputMapper.java @@ -82,6 +82,7 @@ protected void setup(org.apache.hadoop.mapreduce.Mapper.Con QueryLogic logic = (QueryLogic) super.applicationContext.getBean(logicName); t = logic.getEnrichedTransformer(query); + Assert.notNull(logic.getMarkingFunctions()); Assert.notNull(logic.getResponseObjectFactory()); this.format = SerializationFormat.valueOf(context.getConfiguration().get(RESULT_SERIALIZATION_FORMAT)); diff --git a/core/utils/metadata-utils b/core/utils/metadata-utils index 9b84bb8b5ac..7f05830ac67 160000 --- a/core/utils/metadata-utils +++ b/core/utils/metadata-utils @@ -1 +1 @@ -Subproject commit 9b84bb8b5ac291b2293619f7e1bd294579990a5d +Subproject commit 7f05830ac6788d492a8e38ef99f173d20bbaef65 diff --git a/microservices/configcheck/src/test/resources/input/microservice/QueryLogicFactory.xml b/microservices/configcheck/src/test/resources/input/microservice/QueryLogicFactory.xml index c11b1c258c3..e6233a4fac1 100644 --- a/microservices/configcheck/src/test/resources/input/microservice/QueryLogicFactory.xml +++ b/microservices/configcheck/src/test/resources/input/microservice/QueryLogicFactory.xml @@ -233,7 +233,23 @@ + + + + + + + + + + + + + + + + diff --git a/microservices/configcheck/src/test/resources/input/webservice/QueryLogicFactory.xml b/microservices/configcheck/src/test/resources/input/webservice/QueryLogicFactory.xml index 306770277de..8c40461dcdf 100644 --- a/microservices/configcheck/src/test/resources/input/webservice/QueryLogicFactory.xml +++ b/microservices/configcheck/src/test/resources/input/webservice/QueryLogicFactory.xml @@ -38,15 +38,23 @@ - + - + + + + + + + + + diff --git a/microservices/configcheck/src/test/resources/rendered/microservice/QueryLogicFactory.xml b/microservices/configcheck/src/test/resources/rendered/microservice/QueryLogicFactory.xml index a28dad51d0f..bb07d23456d 100644 --- a/microservices/configcheck/src/test/resources/rendered/microservice/QueryLogicFactory.xml +++ b/microservices/configcheck/src/test/resources/rendered/microservice/QueryLogicFactory.xml @@ -233,7 +233,23 @@ + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index b96ba02f806..1a198e3ee98 100644 --- a/pom.xml +++ b/pom.xml @@ -71,7 +71,7 @@ 3.0.0 1.0.0 4.0.7 - 3.0.3 + 3.1.2 1.9.0 5.2.0 2.15.0 diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/DatawaveFieldIndexCachingIteratorJexl.java b/warehouse/query-core/src/main/java/datawave/core/iterators/DatawaveFieldIndexCachingIteratorJexl.java index f9a7ce19cb8..85980d15a78 100644 --- a/warehouse/query-core/src/main/java/datawave/core/iterators/DatawaveFieldIndexCachingIteratorJexl.java +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/DatawaveFieldIndexCachingIteratorJexl.java @@ -1303,8 +1303,11 @@ protected void setupRowBasedHdfsBackedSet(String row) throws IOException { this.createdRowDir = false; } - this.set = new HdfsBackedSortedSet<>(null, hdfsBackedSetBufferSize, ivaratorCacheDirs, row, maxOpenFiles, numRetries, persistOptions, - new FileKeySortedSet.Factory()); + // noinspection unchecked + this.set = (HdfsBackedSortedSet) HdfsBackedSortedSet.builder().withBufferPersistThreshold(hdfsBackedSetBufferSize) + .withIvaratorCacheDirs(ivaratorCacheDirs).withUniqueSubPath(row).withMaxOpenFiles(maxOpenFiles).withNumRetries(numRetries) + .withPersistOptions(persistOptions).withSetFactory(new FileKeySortedSet.Factory()).build(); + this.threadSafeSet = Collections.synchronizedSortedSet(this.set); this.currentRow = row; this.setControl.takeOwnership(row, this); diff --git a/warehouse/query-core/src/main/java/datawave/core/iterators/filesystem/FileSystemCache.java b/warehouse/query-core/src/main/java/datawave/core/iterators/filesystem/FileSystemCache.java index e3fcd9890b5..1796dd4aca8 100644 --- a/warehouse/query-core/src/main/java/datawave/core/iterators/filesystem/FileSystemCache.java +++ b/warehouse/query-core/src/main/java/datawave/core/iterators/filesystem/FileSystemCache.java @@ -25,8 +25,10 @@ public class FileSystemCache { public FileSystemCache(String hdfsSiteConfigs) throws MalformedURLException { conf = new Configuration(); - for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) { - conf.addResource(new URL(url)); + if (hdfsSiteConfigs != null) { + for (String url : org.apache.commons.lang.StringUtils.split(hdfsSiteConfigs, ',')) { + conf.addResource(new URL(url)); + } } } diff --git a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java index 48979701c3d..7faba47fc2a 100644 --- a/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java +++ b/warehouse/query-core/src/main/java/datawave/query/QueryParameters.java @@ -184,6 +184,7 @@ public class QueryParameters { public static final String GROUP_FIELDS_BATCH_SIZE = "group.fields.batch.size"; public static final String UNIQUE_FIELDS = "unique.fields"; + public static final String MOST_RECENT_UNIQUE = "most.recent.unique"; /** * Used to specify fields which are excluded from QueryModel expansion diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java index b425e6451f0..c27ff50c289 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/Attribute.java @@ -9,6 +9,7 @@ import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.commons.lang.builder.EqualsBuilder; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; @@ -228,6 +229,20 @@ public boolean isFromIndex() { return fromIndex; } + @Override + public boolean equals(Object o) { + if (!(o instanceof Attribute)) { + return false; + } + Attribute other = (Attribute) o; + EqualsBuilder equals = new EqualsBuilder().append(this.isMetadataSet(), other.isMetadataSet()); + if (this.isMetadataSet()) { + equals.append(this.getMetadata(), other.getMetadata()); + } + return equals.isEquals(); + } + + @Override public int hashCode() { HashCodeBuilder hcb = new HashCodeBuilder(145, 11); hcb.append(this.isMetadataSet()); diff --git a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java index 0fab5c560ce..0c861f54e6b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/attributes/UniqueFields.java @@ -4,14 +4,17 @@ import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.NavigableSet; import java.util.Objects; import java.util.Set; +import java.util.SortedSet; import org.apache.commons.lang.StringUtils; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; import com.google.common.collect.Sets; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; @@ -25,9 +28,11 @@ * captured as a parameter string using {@link UniqueFields#toString()}, and transformed back into a {@link UniqueFields} instance via * {@link UniqueFields#from(String)}. */ -public class UniqueFields implements Serializable { +public class UniqueFields implements Serializable, Cloneable { - private Multimap fieldMap; + private final TreeMultimap fieldMap = TreeMultimap.create(); + private boolean mostRecent = false; + private static String MOST_RECENT_UNIQUE = "_MOST_RECENT_"; /** * Returns a new {@link UniqueFields} parsed from this string. The provided string is expected to have the format returned by @@ -72,8 +77,12 @@ public static UniqueFields from(String string) { if (nextComma == -1 && nextStartBracket == -1) { String field = string.substring(currentIndex); if (!field.isEmpty()) { - // Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],, - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); + } else { + // Add the field only if its not blank. Ignore cases with consecutive trailing commas like field1[ALL],, + uniqueFields.put(field, UniqueGranularity.ALL); + } } break; // There are no more fields to be parsed. } else if (nextComma != -1 && (nextStartBracket == -1 || nextComma < nextStartBracket)) { @@ -87,8 +96,12 @@ public static UniqueFields from(String string) { // Add the field with the ALL granularity. String field = string.substring(currentIndex, nextComma); if (!field.isEmpty()) { - // Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY] - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); + } else { + // Add the field only if its not blank. Ignore cases with consecutive commas like field1,,field2[DAY] + uniqueFields.put(field, UniqueGranularity.ALL); + } } currentIndex = nextComma + 1; // Advance to the start of the next field. } else { @@ -100,14 +113,18 @@ public static UniqueFields from(String string) { String field = string.substring(currentIndex, nextStartBracket); int nextEndBracket = string.indexOf(Constants.BRACKET_END, currentIndex); if (!field.isEmpty()) { - String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); - // An empty granularity list, e.g. field[] is equivalent to field[ALL]. - if (granularityList.isEmpty()) { - uniqueFields.put(field, UniqueGranularity.ALL); + if (field.equals(MOST_RECENT_UNIQUE)) { + uniqueFields.setMostRecent(true); } else { - String[] granularities = StringUtils.split(granularityList, Constants.COMMA); - for (String granularity : granularities) { - uniqueFields.put(field, parseGranularity(granularity)); + String granularityList = string.substring((nextStartBracket + 1), nextEndBracket); + // An empty granularity list, e.g. field[] is equivalent to field[ALL]. + if (granularityList.isEmpty()) { + uniqueFields.put(field, UniqueGranularity.ALL); + } else { + String[] granularities = StringUtils.split(granularityList, Constants.COMMA); + for (String granularity : granularities) { + uniqueFields.put(field, parseGranularity(granularity)); + } } } } @@ -128,24 +145,19 @@ private static UniqueGranularity parseGranularity(String granularity) { } /** - * Return a copy of the given {@link UniqueFields}. + * Return a clone of this class * - * @param other - * the other instance to copy * @return the copy */ - public static UniqueFields copyOf(UniqueFields other) { - if (other == null) { - return null; - } - UniqueFields uniqueFields = new UniqueFields(); - uniqueFields.fieldMap = TreeMultimap.create(other.fieldMap); - return uniqueFields; + @Override + public UniqueFields clone() { + UniqueFields newFields = new UniqueFields(); + newFields.fieldMap.putAll(this.fieldMap); + newFields.mostRecent = this.mostRecent; + return newFields; } - public UniqueFields() { - fieldMap = TreeMultimap.create(); - } + public UniqueFields() {} /** * Create a new {@link UniqueFields} with the provided map as the underlying field map. @@ -154,7 +166,24 @@ public UniqueFields() { * the field map to use */ public UniqueFields(SortedSetMultimap fieldMap) { - this.fieldMap = fieldMap; + putAll(fieldMap); + } + + /** + * Clear out the field map + */ + public UniqueFields clear() { + this.fieldMap.clear(); + return this; + } + + /** + * Set the field map + * + * @param fields + */ + public UniqueFields set(Multimap fields) { + return clear().putAll(fields); } /** @@ -165,8 +194,9 @@ public UniqueFields(SortedSetMultimap fieldMap) { * @param uniqueGranularity * the granularity */ - public void put(String field, UniqueGranularity uniqueGranularity) { - fieldMap.put(field, uniqueGranularity); + public UniqueFields put(String field, UniqueGranularity uniqueGranularity) { + fieldMap.put(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), uniqueGranularity); + return this; } /** @@ -175,10 +205,13 @@ public void put(String field, UniqueGranularity uniqueGranularity) { * @param fieldMap * the field map to add entries from */ - public void putAll(Multimap fieldMap) { + public UniqueFields putAll(Multimap fieldMap) { if (fieldMap != null) { - this.fieldMap.putAll(fieldMap); + for (String field : fieldMap.keySet()) { + this.fieldMap.putAll(JexlASTHelper.deconstructIdentifier(field).toUpperCase(), fieldMap.get(field)); + } } + return this; } /** @@ -195,12 +228,12 @@ public void replace(String field, String replacement) { } /** - * Return a copy of the fields within this {@link UniqueFields}. Modifications to this set will not modify the fields in this {@link UniqueFields}. + * Return the fields within this {@link UniqueFields}. Modifications to this set will modify the fields in this {@link UniqueFields}. * * @return a copy of the fields */ - public Set getFields() { - return Sets.newHashSet(fieldMap.keySet()); + public NavigableSet getFields() { + return fieldMap.keySet(); } /** @@ -208,26 +241,10 @@ public Set getFields() { * * @return the field map */ - public Multimap getFieldMap() { + public TreeMultimap getFieldMap() { return fieldMap; } - /** - * Replace any identifier fields with their deconstructed version. - */ - public void deconstructIdentifierFields() { - Multimap newFieldMap = TreeMultimap.create(); - for (String field : fieldMap.keySet()) { - String newField = JexlASTHelper.deconstructIdentifier(field); - if (newField.equals(field)) { - newFieldMap.putAll(field, fieldMap.get(field)); - } else { - newFieldMap.putAll(newField, fieldMap.get(field)); - } - } - this.fieldMap = newFieldMap; - } - /** * Remap all fields to include any matches from the provided model. The original field entries will be retained. * @@ -238,12 +255,11 @@ public void remapFields(Multimap model) { Multimap newFieldMap = TreeMultimap.create(fieldMap); for (String field : fieldMap.keySet()) { Collection granularities = fieldMap.get(field); - field = field.toUpperCase(); if (model.containsKey(field)) { model.get(field).forEach((newField) -> newFieldMap.putAll(newField, granularities)); } } - this.fieldMap = newFieldMap; + set(newFieldMap); } /** @@ -305,6 +321,10 @@ public String transformValue(String field, String value) { @Override public String toString() { StringBuilder sb = new StringBuilder(); + if (mostRecent) { + sb.append(MOST_RECENT_UNIQUE); + sb.append(Constants.COMMA); + } Iterator fieldIterator = fieldMap.keySet().iterator(); while (fieldIterator.hasNext()) { // Write the field. @@ -326,6 +346,15 @@ public String toString() { return sb.toString(); } + public boolean isMostRecent() { + return mostRecent; + } + + public UniqueFields setMostRecent(boolean mostRecent) { + this.mostRecent = mostRecent; + return this; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -335,12 +364,12 @@ public boolean equals(Object o) { return false; } UniqueFields that = (UniqueFields) o; - return Objects.equals(fieldMap, that.fieldMap); + return Objects.equals(fieldMap, that.fieldMap) && mostRecent == that.mostRecent; } @Override public int hashCode() { - return Objects.hash(fieldMap); + return Objects.hash(fieldMap, mostRecent); } } diff --git a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java index d51f64cd65c..5f7cd133e2b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java +++ b/warehouse/query-core/src/main/java/datawave/query/common/grouping/GroupFields.java @@ -311,7 +311,7 @@ public void deconstructIdentifiers() { // Return a copy of the given set with all identifiers deconstructed. private Set deconstructIdentifiers(Set set) { - return set.stream().map(JexlASTHelper::deconstructIdentifier).collect(Collectors.toSet()); + return set.stream().map(JexlASTHelper::deconstructIdentifier).map(String::toUpperCase).collect(Collectors.toSet()); } /** diff --git a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java index a6dd448a4ec..70b210460b7 100644 --- a/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java +++ b/warehouse/query-core/src/main/java/datawave/query/config/ShardQueryConfiguration.java @@ -357,6 +357,8 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement private List ivaratorCacheDirConfigs = Collections.emptyList(); private String ivaratorFstHdfsBaseURIs = null; private int ivaratorCacheBufferSize = 10000; + + private int uniqueCacheBufferSize = 100; private long ivaratorCacheScanPersistThreshold = 100000L; private long ivaratorCacheScanTimeout = 1000L * 60 * 60; private int maxFieldIndexRangeSplit = 11; @@ -731,7 +733,9 @@ public void copyFrom(ShardQueryConfiguration other) { this.setCompositeFilterFunctionsEnabled(other.isCompositeFilterFunctionsEnabled()); this.setGroupFieldsBatchSize(other.getGroupFieldsBatchSize()); this.setAccrueStats(other.getAccrueStats()); - this.setUniqueFields(UniqueFields.copyOf(other.getUniqueFields())); + this.setUniqueFields(other.getUniqueFields()); + log.info("Checkpointing with " + getUniqueFields()); + this.setUniqueCacheBufferSize(other.getUniqueCacheBufferSize()); this.setCacheModel(other.getCacheModel()); this.setTrackSizes(other.isTrackSizes()); this.setContentFieldNames(null == other.getContentFieldNames() ? null : Lists.newArrayList(other.getContentFieldNames())); @@ -1535,6 +1539,14 @@ public void setIvaratorFstHdfsBaseURIs(String ivaratorFstHdfsBaseURIs) { this.ivaratorFstHdfsBaseURIs = ivaratorFstHdfsBaseURIs; } + public int getUniqueCacheBufferSize() { + return uniqueCacheBufferSize; + } + + public void setUniqueCacheBufferSize(int uniqueCacheBufferSize) { + this.uniqueCacheBufferSize = uniqueCacheBufferSize; + } + public int getIvaratorCacheBufferSize() { return ivaratorCacheBufferSize; } @@ -1890,11 +1902,7 @@ public UniqueFields getUniqueFields() { } public void setUniqueFields(UniqueFields uniqueFields) { - this.uniqueFields = uniqueFields; - // If unique fields are present, make sure they are deconstructed by this point. - if (uniqueFields != null) { - uniqueFields.deconstructIdentifierFields(); - } + this.uniqueFields = uniqueFields.clone(); } public boolean isHitList() { @@ -2938,6 +2946,7 @@ public boolean equals(Object o) { getGroupFieldsBatchSize() == that.getGroupFieldsBatchSize() && getAccrueStats() == that.getAccrueStats() && Objects.equals(getUniqueFields(), that.getUniqueFields()) && + getUniqueCacheBufferSize() == that.getUniqueCacheBufferSize() && getCacheModel() == that.getCacheModel() && isTrackSizes() == that.isTrackSizes() && getEnforceUniqueConjunctionsWithinExpression() == that.getEnforceUniqueConjunctionsWithinExpression() && @@ -3208,6 +3217,7 @@ public int hashCode() { getAccrueStats(), getGroupFields(), getUniqueFields(), + getUniqueCacheBufferSize(), getCacheModel(), isTrackSizes(), getContentFieldNames(), diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java index c99cd2d5fcb..c51168de6e1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/QueryIterator.java @@ -121,6 +121,7 @@ import datawave.query.util.Tuple3; import datawave.query.util.TupleToEntry; import datawave.query.util.TypeMetadata; +import datawave.query.util.sortedset.FileSortedSet; import datawave.util.StringUtils; /** @@ -452,8 +453,7 @@ else if (documentRange != null && (!this.isContainsIndexOnlyTerms() && this.getT // now apply the unique iterator if requested UniqueTransform uniquify = getUniqueTransform(); if (uniquify != null) { - // pipelineDocuments = uniquify; - pipelineDocuments = Iterators.filter(pipelineDocuments, uniquify.getUniquePredicate()); + pipelineDocuments = uniquify.getIterator(pipelineDocuments); } // apply the grouping iterator if requested and if the batch size is greater than zero @@ -1553,11 +1553,23 @@ public Comparator getValueComparator(Tuple3 hitsOnlySet = new HashSet<>(); @@ -518,6 +522,7 @@ public void deepCopy(QueryOptions other) { this.ivaratorCacheDirConfigs = (other.ivaratorCacheDirConfigs == null) ? null : new ArrayList<>(other.ivaratorCacheDirConfigs); this.hdfsSiteConfigURLs = other.hdfsSiteConfigURLs; this.ivaratorCacheBufferSize = other.ivaratorCacheBufferSize; + this.uniqueCacheBufferSize = other.uniqueCacheBufferSize; this.ivaratorCacheScanPersistThreshold = other.ivaratorCacheScanPersistThreshold; this.ivaratorCacheScanTimeout = other.ivaratorCacheScanTimeout; this.hdfsFileCompressionCodec = other.hdfsFileCompressionCodec; @@ -1057,6 +1062,14 @@ public void setIvaratorCacheBufferSize(int ivaratorCacheBufferSize) { this.ivaratorCacheBufferSize = ivaratorCacheBufferSize; } + public int getUniqueCacheBufferSize() { + return uniqueCacheBufferSize; + } + + public void setUniqueCacheBufferSize(int uniqueCacheBufferSize) { + this.uniqueCacheBufferSize = uniqueCacheBufferSize; + } + public long getIvaratorCacheScanPersistThreshold() { return ivaratorCacheScanPersistThreshold; } @@ -1202,7 +1215,7 @@ public UniqueFields getUniqueFields() { } public void setUniqueFields(UniqueFields uniqueFields) { - this.uniqueFields = uniqueFields; + this.uniqueFields = uniqueFields.clone(); } public Set getHitsOnlySet() { @@ -1698,6 +1711,12 @@ public boolean validateOptions(Map options) { if (options.containsKey(UNIQUE_FIELDS)) { this.setUniqueFields(UniqueFields.from(options.get(UNIQUE_FIELDS))); + if (options.containsKey(MOST_RECENT_UNIQUE)) { + this.getUniqueFields().setMostRecent(Boolean.valueOf(options.get(MOST_RECENT_UNIQUE))); + if (options.containsKey(UNIQUE_CACHE_BUFFER_SIZE)) { + this.setUniqueCacheBufferSize(Integer.parseInt(options.get(UNIQUE_CACHE_BUFFER_SIZE))); + } + } } if (options.containsKey(HIT_LIST)) { diff --git a/warehouse/query-core/src/main/java/datawave/query/iterator/profile/FinalDocumentTrackingIterator.java b/warehouse/query-core/src/main/java/datawave/query/iterator/profile/FinalDocumentTrackingIterator.java index e8af4ae0931..c00f60784f0 100644 --- a/warehouse/query-core/src/main/java/datawave/query/iterator/profile/FinalDocumentTrackingIterator.java +++ b/warehouse/query-core/src/main/java/datawave/query/iterator/profile/FinalDocumentTrackingIterator.java @@ -24,8 +24,8 @@ public class FinalDocumentTrackingIterator implements Iterator *
  • {@code f:unique_by_second()}: Expects a comma-delimited list of fields to be unique with a granularity level of by SECOND, e.g. * {@code unique_by_second('field1','field2')}
  • + *
  • {@code f:most_recent_unique...} Adding most_recent_ before any unique function will set the most.recent.unique flag to true, e.g. + * {@code most_recent_unique_by_day('field1','field2')}
  • *
  • {@code f:rename}: Expects a comma-delimited list field/field mappings e.g. {@code f:rename('field1=field2','field3=field4')}
  • * */ @@ -59,9 +61,18 @@ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { QueryFunctions.UNIQUE_FUNCTION, UniqueFunction.UNIQUE_BY_DAY_FUNCTION, UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, - QueryFunctions.GROUPBY_FUNCTION, QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION, - QueryFunctions.LENIENT_FIELDS_FUNCTION, QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, - QueryFunctions.AVERAGE, QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION); + QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_DAY_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION, + QueryFunctions.MOST_RECENT_PREFIX + UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, QueryFunctions.GROUPBY_FUNCTION, + QueryFunctions.EXCERPT_FIELDS_FUNCTION, QueryFunctions.NO_EXPANSION, QueryFunctions.LENIENT_FIELDS_FUNCTION, + QueryFunctions.STRICT_FIELDS_FUNCTION, QueryFunctions.SUM, QueryFunctions.MIN, QueryFunctions.MAX, QueryFunctions.AVERAGE, + QueryFunctions.COUNT, QueryFunctions.RENAME_FUNCTION); @SuppressWarnings("unchecked") public static T collect(T node, Object data) { @@ -188,7 +199,16 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { ASTNamespaceIdentifier nsIdentifier = (ASTNamespaceIdentifier) node.jjtGetChild(0); // if this is the f:options function, create a List for the userData to be passed to the child nodes if (nsIdentifier.getNamespace().equals(QueryFunctions.QUERY_FUNCTION_NAMESPACE)) { - switch (String.valueOf(nsIdentifier.getName())) { + String function = String.valueOf(nsIdentifier.getName()); + + // check for the most recent flag for the unique functions only + boolean mostRecent = function.startsWith(QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION); + if (mostRecent) { + function = function.substring(QueryFunctions.MOST_RECENT_PREFIX.length()); + optionsMap.put(QueryParameters.MOST_RECENT_UNIQUE, "true"); + } + + switch (function) { case QueryFunctions.OPTIONS_FUNCTION: { List optionsList = new ArrayList<>(); this.visit(node, optionsList); @@ -234,7 +254,7 @@ private Object visit(ASTFunctionNode node, Map optionsMap) { case UniqueFunction.UNIQUE_BY_SECOND_FUNCTION: case UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION: { UniqueFields uniqueFields = new UniqueFields(); - updateUniqueFields(node, uniqueFields, optionsMap, UniqueFunction.findByName(nsIdentifier.getName())); + updateUniqueFields(node, uniqueFields, optionsMap, UniqueFunction.findByName(function)); return null; } case QueryFunctions.GROUPBY_FUNCTION: { diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java new file mode 100644 index 00000000000..ed94abec1ae --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUnique.java @@ -0,0 +1,72 @@ +package datawave.query.language.functions.jexl; + +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.stream.Collectors; + +import datawave.query.attributes.UniqueFields; +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.language.functions.QueryFunction; +import datawave.webservice.query.exception.BadRequestQueryException; +import datawave.webservice.query.exception.DatawaveErrorCode; + +/** + * Function to determine most recent uniqueness among documents given a set of fields and the levels of granularity that should be used for each fields. This + * function accepts a list of fields with specified granularity levels in the format {@code field[ALL],dateField[DAY,HOUR,MINUTE]}. See {@link UniqueFields} for + * additional documentation on supported formatting. + */ +public class MostRecentUnique extends JexlQueryFunction { + + public MostRecentUnique() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryFunctions.UNIQUE_FUNCTION, new ArrayList<>()); + } + + /** + * query options contain a list of fields. Cannot be the empty list. + * + * @throws IllegalArgumentException + * for illegal arguments + */ + @Override + public void validate() throws IllegalArgumentException { + if (this.parameterList.isEmpty()) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, + MessageFormat.format("{0} requires at least one argument", this.name)); + throw new IllegalArgumentException(qe); + } else { + String parameters = String.join(",", parameterList); + try { + UniqueFields.from(parameters); + } catch (Exception e) { + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INVALID_FUNCTION_ARGUMENTS, + MessageFormat.format("Unable to parse unique fields from arguments for function {0}", this.name)); + throw new IllegalArgumentException(qe); + } + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + + sb.append(QueryFunctions.QUERY_FUNCTION_NAMESPACE).append(':').append(QueryFunctions.MOST_RECENT_PREFIX).append(QueryFunctions.UNIQUE_FUNCTION); + if (parameterList.isEmpty()) { + sb.append("()"); + } else { + char separator = '('; + for (String parm : parameterList) { + sb.append(separator).append(escapeString(parm)); + separator = ','; + } + sb.append(')'); + } + + return sb.toString(); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUnique(); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java new file mode 100644 index 00000000000..845bd863ae3 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByDay.java @@ -0,0 +1,22 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a unique result for every day for a given list of fields. This function is equivalent to {@code #MOST_RECENT_UNIQUE(field[DAY])}. + */ +public class MostRecentUniqueByDay extends UniqueByFunction { + + public MostRecentUniqueByDay() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByDay(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java new file mode 100644 index 00000000000..c831dac8aec --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByHour.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a unique result for every hour of the day for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[HOUR])}. + */ +public class MostRecentUniqueByHour extends UniqueByFunction { + + public MostRecentUniqueByHour() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByHour(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java new file mode 100644 index 00000000000..f8b04bc4050 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMinute.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a most recent unique result for every minute of the hour for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[MINUTE])}. + */ +public class MostRecentUniqueByMinute extends UniqueByFunction { + + public MostRecentUniqueByMinute() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByMinute(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java new file mode 100644 index 00000000000..3c611479dd5 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByMonth.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a most recent unique result for every month of the year for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[MONTH])}. + */ +public class MostRecentUniqueByMonth extends UniqueByFunction { + + public MostRecentUniqueByMonth() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByMonth(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java new file mode 100644 index 00000000000..8ff9eedbb45 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueBySecond.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a most recent unique result for every second for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[SECOND])}. + */ +public class MostRecentUniqueBySecond extends UniqueByFunction { + + public MostRecentUniqueBySecond() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueBySecond(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java new file mode 100644 index 00000000000..81948a62cb3 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByTenthOfHour.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a most recent_unique result for every tenth of an hour for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[TENTH_OF_HOUR])}. + */ +public class MostRecentUniqueByTenthOfHour extends UniqueByFunction { + + public MostRecentUniqueByTenthOfHour() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByTenthOfHour(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java new file mode 100644 index 00000000000..24d8c8c4471 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/MostRecentUniqueByYear.java @@ -0,0 +1,23 @@ +package datawave.query.language.functions.jexl; + +import java.util.ArrayList; + +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.language.functions.QueryFunction; + +/** + * Function to return a most recent unique result for the year for a given list of fields. This function is equivalent to + * {@code #MOST_RECENT_UNIQUE(field[YEAR])}. + */ +public class MostRecentUniqueByYear extends UniqueByFunction { + + public MostRecentUniqueByYear() { + super(QueryFunctions.MOST_RECENT_PREFIX + QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION, new ArrayList<>()); + } + + @Override + public QueryFunction duplicate() { + return new MostRecentUniqueByYear(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByDay.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByDay.java index a354b79c5a0..c6fd2c0f040 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByDay.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByDay.java @@ -1,13 +1,9 @@ package datawave.query.language.functions.jexl; -import java.text.MessageFormat; import java.util.ArrayList; -import datawave.query.jexl.functions.QueryFunctions; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.language.functions.QueryFunction; -import datawave.webservice.query.exception.BadRequestQueryException; -import datawave.webservice.query.exception.DatawaveErrorCode; /** * Function to return a unique result for every day for a given list of fields. This function is equivalent to {@code #unique(field[DAY])}. diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByHour.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByHour.java index 78afb7e50b1..c6f298f0c0b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByHour.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByHour.java @@ -1,16 +1,12 @@ package datawave.query.language.functions.jexl; -import java.text.MessageFormat; import java.util.ArrayList; -import datawave.query.jexl.functions.QueryFunctions; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.language.functions.QueryFunction; -import datawave.webservice.query.exception.BadRequestQueryException; -import datawave.webservice.query.exception.DatawaveErrorCode; /** - * Function to return a unique result for every hour of the day for a given list of fields. This function is equivalent to {@code #unique(field[DAY])}. + * Function to return a unique result for every hour of the day for a given list of fields. This function is equivalent to {@code #unique(field[HOUR])}. */ public class UniqueByHour extends UniqueByFunction { diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByMinute.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByMinute.java index 90bc82720fc..b05d880f4fd 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByMinute.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/UniqueByMinute.java @@ -1,13 +1,9 @@ package datawave.query.language.functions.jexl; -import java.text.MessageFormat; import java.util.ArrayList; -import datawave.query.jexl.functions.QueryFunctions; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.language.functions.QueryFunction; -import datawave.webservice.query.exception.BadRequestQueryException; -import datawave.webservice.query.exception.DatawaveErrorCode; /** * Function to return a unique result for every minute of the hour for a given list of fields. This function is equivalent to {@code #unique(field[MINUTE])}. diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 4bc1f729049..0324c7a69fd 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -627,6 +627,11 @@ private void configureIterator(ShardQueryConfiguration config, IteratorSetting c addOption(cfg, QueryOptions.GROUP_FIELDS, config.getGroupFields().toString(), true); addOption(cfg, QueryOptions.GROUP_FIELDS_BATCH_SIZE, config.getGroupFieldsBatchSizeAsString(), true); addOption(cfg, QueryOptions.UNIQUE_FIELDS, config.getUniqueFields().toString(), true); + if (config.getUniqueFields().isMostRecent()) { + // this may be redundant with the uniqueFields.toString(), but other code relies on this explicitly being set + addOption(cfg, QueryOptions.MOST_RECENT_UNIQUE, Boolean.toString(true), false); + addOption(cfg, QueryOptions.UNIQUE_CACHE_BUFFER_SIZE, Integer.toString(config.getUniqueCacheBufferSize()), false); + } addOption(cfg, QueryOptions.HIT_LIST, Boolean.toString(config.isHitList()), false); addOption(cfg, QueryOptions.TERM_FREQUENCY_FIELDS, Joiner.on(',').join(config.getQueryTermFrequencyFields()), false); addOption(cfg, QueryOptions.TERM_FREQUENCIES_REQUIRED, Boolean.toString(config.isTermFrequenciesRequired()), false); diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java index 249b33d2b26..9c6eaedf486 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/QueryOptionsSwitch.java @@ -62,8 +62,14 @@ public static void apply(Map optionsMap, ShardQueryConfiguration break; case QueryParameters.UNIQUE_FIELDS: UniqueFields uniqueFields = UniqueFields.from(value); + // preserve the most recent flag + uniqueFields.setMostRecent(config.getUniqueFields().isMostRecent()); config.setUniqueFields(uniqueFields); break; + case QueryParameters.MOST_RECENT_UNIQUE: + log.info("Setting unique fields to be most recent"); + config.getUniqueFields().setMostRecent(Boolean.parseBoolean(value)); + break; case QueryParameters.EXCERPT_FIELDS: ExcerptFields excerptFields = ExcerptFields.from(value); config.setExcerptFields(excerptFields); diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index bacab5def54..1e764715d8c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -101,6 +101,7 @@ import datawave.query.util.MetadataHelper; import datawave.query.util.MetadataHelperFactory; import datawave.query.util.QueryStopwatch; +import datawave.query.util.sortedset.FileSortedSet; import datawave.util.time.TraceStopwatch; import datawave.webservice.query.exception.QueryException; import datawave.webservice.query.result.event.ResponseObjectFactory; @@ -277,7 +278,9 @@ public static BatchScanner createBatchScanner(ShardQueryConfiguration config, Sc @Override public GenericQueryConfiguration initialize(AccumuloClient client, Query settings, Set auths) throws Exception { + // whenever we reinitialize, ensure we have a fresh transformer this.transformerInstance = null; + this.config = ShardQueryConfiguration.create(this, settings); if (log.isTraceEnabled()) log.trace("Initializing ShardQueryLogic: " + System.identityHashCode(this) + '(' @@ -650,7 +653,11 @@ protected String getStopwatchHeader(ShardQueryConfiguration config) { @Override public QueryLogicTransformer getTransformer(Query settings) { if (this.transformerInstance != null) { - addConfigBasedTransformers(); + try { + addConfigBasedTransformers(); + } catch (QueryException e) { + throw new DatawaveFatalQueryException("Unable to configure transformers", e); + } return this.transformerInstance; } @@ -673,7 +680,11 @@ public QueryLogicTransformer getTransformer(Query settings) { transformer.setPrimaryToSecondaryFieldMap(primaryToSecondaryFieldMap); transformer.setQm(queryModel); this.transformerInstance = transformer; - addConfigBasedTransformers(); + try { + addConfigBasedTransformers(); + } catch (QueryException e) { + throw new DatawaveFatalQueryException("Unable to configure transformers", e); + } return this.transformerInstance; } @@ -690,7 +701,7 @@ public boolean isLongRunningQuery() { /** * If the configuration didn't exist, OR IT CHANGED, we need to create or update the transformers that have been added. */ - private void addConfigBasedTransformers() { + private void addConfigBasedTransformers() throws QueryException { if (getConfig() != null) { ((DocumentTransformer) this.transformerInstance).setProjectFields(getConfig().getProjectFields()); ((DocumentTransformer) this.transformerInstance).setDisallowlistedFields(getConfig().getDisallowlistedFields()); @@ -698,10 +709,26 @@ private void addConfigBasedTransformers() { if (getConfig().getUniqueFields() != null && !getConfig().getUniqueFields().isEmpty()) { DocumentTransform alreadyExists = ((DocumentTransformer) this.transformerInstance).containsTransform(UniqueTransform.class); if (alreadyExists != null) { - ((UniqueTransform) alreadyExists).updateConfig(getConfig().getUniqueFields(), getQueryModel()); + ((UniqueTransform) alreadyExists).updateConfig(getConfig().getUniqueFields()); } else { - ((DocumentTransformer) this.transformerInstance) - .addTransform(new UniqueTransform(this, getConfig().getUniqueFields(), this.getQueryExecutionForPageTimeout())); + try { + // @formatter:off + ((DocumentTransformer) this.transformerInstance).addTransform(new UniqueTransform.Builder() + .withUniqueFields(getConfig().getUniqueFields()) + .withQueryExecutionForPageTimeout(this.getQueryExecutionForPageTimeout()) + .withModel(getQueryModel()) + .withBufferPersistThreshold(getUniqueCacheBufferSize()) + .withIvaratorCacheDirConfigs(getIvaratorCacheDirConfigs()) + .withHdfsSiteConfigURLs(getHdfsSiteConfigURLs()) + .withSubDirectory(getConfig().getQuery().getId().toString()) + .withMaxOpenFiles(getIvaratorMaxOpenFiles()) + .withNumRetries(getIvaratorNumRetries()) + .withPersistOptions(new FileSortedSet.PersistOptions(true, false, 0)) + .build()); + // @formatter:on + } catch (IOException ioe) { + throw new QueryException("Unable to create a unique transform", ioe); + } } } @@ -971,11 +998,18 @@ protected void loadQueryParameters(ShardQueryConfiguration config, Query setting UniqueFields uniqueFields = UniqueFields.from(uniqueFieldsParam); // Only set the unique fields if we were actually given some if (!uniqueFields.isEmpty()) { - this.setUniqueFields(uniqueFields); + // preserve the most recent flag + uniqueFields.setMostRecent(config.getUniqueFields().isMostRecent()); config.setUniqueFields(uniqueFields); } } + // Get the most recent flag + String mostRecentUnique = settings.findParameter(QueryParameters.MOST_RECENT_UNIQUE).getParameterValue().trim(); + if (StringUtils.isNotBlank(mostRecentUnique)) { + config.getUniqueFields().setMostRecent(Boolean.valueOf(mostRecentUnique)); + } + // Get the EXCERPT_FIELDS parameter if given String excerptFieldsParam = settings.findParameter(QueryParameters.EXCERPT_FIELDS).getParameterValue().trim(); if (StringUtils.isNotBlank(excerptFieldsParam)) { @@ -1977,6 +2011,14 @@ public void setIvaratorFstHdfsBaseURIs(String ivaratorFstHdfsBaseURIs) { getConfig().setIvaratorFstHdfsBaseURIs(ivaratorFstHdfsBaseURIs); } + public int getUniqueCacheBufferSize() { + return getConfig().getUniqueCacheBufferSize(); + } + + public void setUniqueCacheBufferSize(int uniqueCacheBufferSize) { + getConfig().setUniqueCacheBufferSize(uniqueCacheBufferSize); + } + public int getIvaratorCacheBufferSize() { return getConfig().getIvaratorCacheBufferSize(); } diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java b/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java index 081959f8777..5fda16f5d12 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/async/event/VisitorFunction.java @@ -414,7 +414,7 @@ protected void pruneEmptyOptions(IteratorSetting settings) { * an {@link IteratorSetting} */ protected void pruneIvaratorConfigs(ASTJexlScript script, IteratorSetting settings) { - if (script != null && !IvaratorRequiredVisitor.isIvaratorRequired(script)) { + if (script != null && !settings.getOptions().containsKey(QueryOptions.MOST_RECENT_UNIQUE) && !IvaratorRequiredVisitor.isIvaratorRequired(script)) { settings.removeOption(QueryOptions.IVARATOR_CACHE_BUFFER_SIZE); settings.removeOption(QueryOptions.IVARATOR_CACHE_DIR_CONFIG); settings.removeOption(QueryOptions.IVARATOR_NUM_RETRIES); diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransform.java index f78177ce039..ea9d6fca930 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/DocumentTransform.java @@ -37,6 +37,7 @@ class DefaultDocumentTransform implements DocumentTransform { public void initialize(Query settings, MarkingFunctions markingFunctions) { this.settings = settings; this.markingFunctions = markingFunctions; + this.queryExecutionForPageStartTime = System.currentTimeMillis(); } @Override diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java index 8327188210f..c9cfca151db 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/GroupingTransform.java @@ -23,7 +23,6 @@ import datawave.query.common.grouping.GroupingUtils; import datawave.query.common.grouping.Groups; import datawave.query.iterator.profile.FinalDocumentTrackingIterator; -import datawave.query.model.QueryModel; /** * GroupingTransform mimics GROUP BY with a COUNT in SQL. For the given fields, this transform will group into unique combinations of values and assign a count @@ -91,6 +90,10 @@ public Entry apply(@Nullable Entry keyDocumentEntry) return keyDocumentEntry; } + if (keyDocumentEntry.getValue().isIntermediateResult()) { + return keyDocumentEntry; + } + keys.add(keyDocumentEntry.getKey()); log.trace("{} get list key counts for: {}", "web-server", keyDocumentEntry); DocumentGrouper.group(keyDocumentEntry, groupFields, groups); diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/UniqueTransform.java b/warehouse/query-core/src/main/java/datawave/query/transformer/UniqueTransform.java index 07fb702b7de..f405f224837 100644 --- a/warehouse/query-core/src/main/java/datawave/query/transformer/UniqueTransform.java +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/UniqueTransform.java @@ -4,21 +4,24 @@ import java.io.DataOutputStream; import java.io.IOException; import java.io.Serializable; +import java.net.URI; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeSet; -import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.log4j.Logger; -import com.google.common.base.Predicate; import com.google.common.collect.HashMultimap; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; @@ -26,26 +29,38 @@ import com.google.common.hash.Funnel; import com.google.common.hash.PrimitiveSink; -import datawave.core.query.logic.BaseQueryLogic; +import datawave.core.iterators.filesystem.FileSystemCache; import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.Document; +import datawave.query.attributes.DocumentKey; import datawave.query.attributes.UniqueFields; +import datawave.query.exceptions.DatawaveFatalQueryException; +import datawave.query.iterator.ivarator.IvaratorCacheDir; +import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; import datawave.query.iterator.profile.FinalDocumentTrackingIterator; import datawave.query.model.QueryModel; -import datawave.query.tables.ShardQueryLogic; +import datawave.query.util.sortedmap.FileByteDocumentSortedMap; +import datawave.query.util.sortedmap.FileKeyDocumentSortedMap; +import datawave.query.util.sortedmap.FileSortedMap; +import datawave.query.util.sortedmap.HdfsBackedSortedMap; +import datawave.query.util.sortedset.ByteArrayComparator; +import datawave.query.util.sortedset.FileSortedSet; /** * This iterator will filter documents based on uniqueness across a set of configured fields. Only the first instance of an event with a unique set of those - * fields will be returned. This transform is thread safe. + * fields will be returned unless mostRecentUnique is specified in which case the most recent instance of an event will be returned. This transform is thread + * safe. */ public class UniqueTransform extends DocumentTransform.DefaultDocumentTransform { private static final Logger log = Logger.getLogger(UniqueTransform.class); private BloomFilter bloom; - private UniqueFields uniqueFields; - private Multimap modelMapping; + private UniqueFields uniqueFields = new UniqueFields(); + private HdfsBackedSortedMap map; + private HdfsBackedSortedMap returnSet; + private Iterator> setIterator; /** * Length of time in milliseconds that a client will wait while results are collected. If a full page is not collected before the timeout, a blank page will @@ -66,66 +81,43 @@ public class UniqueTransform extends DocumentTransform.DefaultDocumentTransform public UniqueTransform(UniqueFields uniqueFields, long queryExecutionForPageTimeout) { this.queryExecutionForPageTimeout = queryExecutionForPageTimeout; this.uniqueFields = uniqueFields; - this.uniqueFields.deconstructIdentifierFields(); - this.bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15); if (log.isTraceEnabled()) { log.trace("unique fields: " + this.uniqueFields.getFields()); } } /** - * Create a new {@link UniqueTransform} that will use a bloom filter to return on those results that are unique per the uniqueFields. Special uniqueness can - * be requested for date/time fields (@see UniqueFields). The logic will be used to get a query model to include the reverse mappings in the unique field - * set + * Update the configuration of this transform. If the configuration is actually changing, then the bloom filter will be reset as well. * - * @param logic - * The query logic from whih to pull the query model * @param uniqueFields - * The unique fields - * @param queryExecutionForPageTimeout - * If this timeout is passed before since the last result was returned, then an "intermediate" result is returned denoting we are still looking - * for the next unique result. + * The new set of unique fields. */ - public UniqueTransform(BaseQueryLogic> logic, UniqueFields uniqueFields, long queryExecutionForPageTimeout) { - this(uniqueFields, queryExecutionForPageTimeout); - QueryModel model = ((ShardQueryLogic) logic).getQueryModel(); - if (model != null) { - modelMapping = HashMultimap.create(); - // reverse the reverse query mapping which will give us a mapping from the final field name to the original field name(s) - for (Map.Entry entry : model.getReverseQueryMapping().entrySet()) { - modelMapping.put(entry.getValue(), entry.getKey()); - } - } - } - - public void updateConfig(UniqueFields uniqueFields, QueryModel model) { - if (this.uniqueFields != uniqueFields) { - uniqueFields.deconstructIdentifierFields(); - if (!this.uniqueFields.equals(uniqueFields)) { - this.uniqueFields = uniqueFields; - log.info("Resetting unique fields on the unique transform"); - this.bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15); - if (log.isTraceEnabled()) { - log.trace("unique fields: " + this.uniqueFields.getFields()); - } + public void updateConfig(UniqueFields uniqueFields) { + // only reset the bloom filter if changing the field set + if (!this.uniqueFields.equals(uniqueFields)) { + this.uniqueFields = uniqueFields.clone(); + log.info("Resetting unique fields on the unique transform"); + if (map != null) { + map.clear(); + returnSet.clear(); + } else { + bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15); } - } - if (model != null) { - modelMapping = HashMultimap.create(); - // reverse the reverse query mapping which will give us a mapping from the final field name to the original field name(s) - for (Map.Entry entry : model.getReverseQueryMapping().entrySet()) { - modelMapping.put(entry.getValue(), entry.getKey()); + if (log.isTraceEnabled()) { + log.trace("unique fields: " + this.uniqueFields.getFields()); } } } /** - * Get a predicate that will apply this transform. + * Add phrase excerpts to the documents from the given iterator. * - * @return A unique transform predicate + * @param in + * the iterator source + * @return an iterator that will supply the enriched documents */ - public Predicate> getUniquePredicate() { - return input -> UniqueTransform.this.apply(input) != null; + public Iterator> getIterator(final Iterator> in) { + return new UniqueTransformIterator(in); } /** @@ -143,27 +135,77 @@ public Entry apply(@Nullable Entry keyDocumentEntry) return keyDocumentEntry; } + if (keyDocumentEntry.getValue().isIntermediateResult()) { + return keyDocumentEntry; + } + try { - if (isDuplicate(keyDocumentEntry.getValue())) { - keyDocumentEntry = null; - } else { + if (map != null) { + byte[] signature = getBytes(keyDocumentEntry.getValue()); + synchronized (map) { + this.map.put(signature, keyDocumentEntry.getValue()); + } + return null; + } else if (!isDuplicate(keyDocumentEntry.getValue())) { return keyDocumentEntry; } } catch (IOException ioe) { log.error("Failed to convert document to bytes. Returning document as unique.", ioe); } - } - long elapsedExecutionTimeForCurrentPage = System.currentTimeMillis() - this.queryExecutionForPageStartTime; - if (elapsedExecutionTimeForCurrentPage > this.queryExecutionForPageTimeout) { - Document intermediateResult = new Document(); - intermediateResult.setIntermediateResult(true); - return Maps.immutableEntry(new Key(), intermediateResult); + long elapsedExecutionTimeForCurrentPage = System.currentTimeMillis() - this.queryExecutionForPageStartTime; + if (elapsedExecutionTimeForCurrentPage > this.queryExecutionForPageTimeout) { + Document intermediateResult = new Document(); + intermediateResult.setIntermediateResult(true); + return Maps.immutableEntry(keyDocumentEntry.getKey(), intermediateResult); + } } return null; } + /** + * This will start pulling data from the hdfs backed set if one exists (only if mostRecent is true). + * + * @return The next unique document from the set. + */ + @Override + public Map.Entry flush() { + if (map != null) { + synchronized (map) { + // persist the map so that we do not loose these results and we compact the files for the final iteration. + try { + map.persist(); + } catch (IOException ioe) { + throw new DatawaveFatalQueryException("Unable to persist the most recent unique maps", ioe); + } + if (setIterator == null) { + setupIterator(); + } + if (setIterator.hasNext()) { + return setIterator.next(); + } + } + } + return null; + } + + /** + * This will run through the set and create a new set ordered by Key, Document + */ + private void setupIterator() { + for (Map.Entry entry : map.entrySet()) { + returnSet.put(getDocKey(entry.getValue()), entry.getValue()); + } + // now persist the return set so that we don't lose the results and compact the sets + try { + returnSet.persist(); + } catch (IOException ioe) { + throw new DatawaveFatalQueryException("Could not persist unique document return set", ioe); + } + setIterator = returnSet.entrySet().iterator(); + } + /** * Determine if a document is unique per the fields specified. If we have seen this set of fields and values before, then it is not unique. * @@ -212,71 +254,90 @@ byte[] getBytes(Document document) throws IOException { * if we failed to generate the byte array */ private void outputSortedFieldValues(Document document, DataOutputStream output) throws IOException { - int count = 0; - String lastField = ""; - List values = new ArrayList<>(); + Multimap values = HashMultimap.create(); for (String documentField : new TreeSet<>(document.getDictionary().keySet())) { String field = getUniqueField(documentField); if (field != null) { - if (!field.equals(lastField)) { - count = dumpValues(count, lastField, values, output); - lastField = field; - } addValues(field, document.get(documentField), values); } } - dumpValues(count, lastField, values, output); + // Always dump the fields in the same order (uniqueFields.getFields is a sorted collection) + for (String field : uniqueFields.getFields()) { + dumpValues(field, values.get(field), output); + } output.flush(); } /** * Dump a list of values, sorted, to the data output stream * - * @param count - * value count * @param field * a field * @param values * the list of values * @param output * the output stream - * @return The next field count * @throws IOException * for issues with read/write */ - private int dumpValues(int count, String field, List values, DataOutputStream output) throws IOException { + private void dumpValues(String field, Collection values, DataOutputStream output) throws IOException { + String separator = "f-" + field + ":"; if (!values.isEmpty()) { - Collections.sort(values); - String separator = "f-" + field + '/' + (count++) + ":"; - for (String value : values) { + List valueList = new ArrayList<>(values); + // always output values in sorted order. + Collections.sort(valueList); + for (String value : valueList) { output.writeUTF(separator); output.writeUTF(value); separator = ","; } - values.clear(); + } else { + // dump at least a header for empty value sets to ensure we have some bytes to check against + // in the bloom filter. + output.writeUTF(separator); } - return count; } - // Return the set of values for the provided attribute. - private void addValues(final String field, Attribute attribute, List values) { + /** + * Add the attribute values to the list of values. + * + * @param field + * The attribute field + * @param attribute + * The attribute + * @param values + * The map of values to be updated + */ + private void addValues(final String field, Attribute attribute, Multimap values) { if (attribute instanceof Attributes) { // @formatter:off ((Attributes) attribute).getAttributes().stream() .forEach(a -> addValues(field, a, values)); // @formatter:on } else { - values.add(uniqueFields.transformValue(field, String.valueOf(attribute.getData()))); + values.put(field, uniqueFields.transformValue(field, String.valueOf(attribute.getData()))); } } - // Return the query-specified field that the provided document matches, if one exists, or otherwise return null. + /** + * Return the query-specified field that the provided document matches, if one exists, or otherwise return null. + * + * @param documentField + * The document field + * @return The query specified field + */ private String getUniqueField(String documentField) { String baseDocumentField = getFieldWithoutGrouping(documentField); return uniqueFields.getFields().stream().filter((field) -> isMatchingField(baseDocumentField, field)).findFirst().orElse(null); } - // Return the provided field with any grouping context removed. + /** + * Return the provided field with any grouping context removed. + * + * @param field + * The field + * @return The field with grouping stripped + */ private String getFieldWithoutGrouping(String field) { int index = field.indexOf('.'); if (index < 0) { @@ -286,14 +347,22 @@ private String getFieldWithoutGrouping(String field) { } } - // Return whether or not the provided document field is considered a case-insensitive match for the provided field, applying reverse model mappings if - // configured. + /** + * Return whether or not the provided document field is considered a case-insensitive match for the provided field + * + * @param baseField + * The base field + * @param field + * The field to match with + * @return true if matching + */ private boolean isMatchingField(String baseField, String field) { - baseField = baseField.toUpperCase(); - field = field.toUpperCase(); - return field.equals(baseField) || (modelMapping != null && modelMapping.get(field).contains(baseField)); + return baseField.equalsIgnoreCase(field); } + /** + * A funnel to use for the bloom filter + */ public static class ByteFunnel implements Funnel, Serializable { private static final long serialVersionUID = -2126172579955897986L; @@ -303,4 +372,209 @@ public void funnel(byte[] from, PrimitiveSink into) { into.putBytes(from); } } + + /** + * An iterator of documents for this unique transform given an underlying iterator of documents. + */ + public class UniqueTransformIterator implements Iterator> { + private final Iterator> iterator; + private Map.Entry next = null; + + public UniqueTransformIterator(Iterator> iterator) { + this.iterator = iterator; + } + + @Override + public boolean hasNext() { + if (next == null) { + next = getNext(); + } + return (next != null); + } + + @Override + public Map.Entry next() { + Map.Entry o = null; + if (next == null) { + o = getNext(); + } else { + o = next; + next = null; + } + return o; + } + + private Map.Entry getNext() { + Map.Entry o = null; + while (o == null && iterator.hasNext()) { + o = apply(iterator.next()); + } + // see if there are any results cached by the transform + if (o == null) { + o = flush(); + } + return o; + } + + } + + /** + * A builder of unique transforms + */ + public static class Builder { + private UniqueFields uniqueFields; + private Comparator keyComparator; + private FileSortedMap.RewriteStrategy keyValueComparator; + private QueryModel model; + private int bufferPersistThreshold; + private List ivaratorCacheDirConfigs; + private String hdfsSiteConfigURLs; + private String subDirectory; + private int maxOpenFiles; + private int numRetries; + private long queryExecutionForPageTimeout; + private FileSortedSet.PersistOptions persistOptions; + + public Builder() { + keyComparator = new ByteArrayComparator(); + + keyValueComparator = (key, original, update) -> { + long ts1 = getTimestamp(original); + long ts2 = getTimestamp(update); + return (ts2 > ts1); + }; + } + + /** + * Build a list of potential hdfs directories based on each ivarator cache dir configs. + * + * @param ivaratorCacheDirConfigs + * @param hdfsSiteConfigURLs + * @param subdirectory + * @return A path + * @throws IOException + * for issues with read/write + */ + private static List getIvaratorCacheDirs(List ivaratorCacheDirConfigs, String hdfsSiteConfigURLs, + String subdirectory) throws IOException { + // build a list of ivarator cache dirs from the configs + List pathAndFs = new ArrayList<>(); + if (ivaratorCacheDirConfigs != null && !ivaratorCacheDirConfigs.isEmpty()) { + for (IvaratorCacheDirConfig config : ivaratorCacheDirConfigs) { + + // first, make sure the cache configuration is valid + if (config.isValid()) { + Path path = new Path(config.getBasePathURI(), subdirectory); + URI uri = path.toUri(); + FileSystem fs = new FileSystemCache(hdfsSiteConfigURLs).getFileSystem(uri); + pathAndFs.add(new IvaratorCacheDir(config, fs, uri.toString())); + } + } + } + + if (pathAndFs.isEmpty()) + throw new IOException("Unable to find a usable hdfs cache dir out of " + ivaratorCacheDirConfigs); + + return pathAndFs; + } + + public Builder withUniqueFields(UniqueFields fields) { + this.uniqueFields = fields; + return this; + } + + public Builder withModel(QueryModel model) { + this.model = model; + return this; + } + + public Builder withBufferPersistThreshold(int bufferPersistThreshold) { + this.bufferPersistThreshold = bufferPersistThreshold; + return this; + } + + public Builder withIvaratorCacheDirConfigs(List ivaratorCacheDirConfigs) { + this.ivaratorCacheDirConfigs = ivaratorCacheDirConfigs; + return this; + } + + public Builder withHdfsSiteConfigURLs(String hdfsSiteConfigURLs) { + this.hdfsSiteConfigURLs = hdfsSiteConfigURLs; + return this; + } + + public Builder withSubDirectory(String subDirectory) { + this.subDirectory = subDirectory; + return this; + } + + public Builder withMaxOpenFiles(int maxOpenFiles) { + this.maxOpenFiles = maxOpenFiles; + return this; + } + + public Builder withNumRetries(int numRetries) { + this.numRetries = numRetries; + return this; + } + + public Builder withPersistOptions(FileSortedSet.PersistOptions persistOptions) { + this.persistOptions = persistOptions; + return this; + } + + public Builder withQueryExecutionForPageTimeout(long timeout) { + this.queryExecutionForPageTimeout = timeout; + return this; + } + + public UniqueTransform build() throws IOException { + UniqueTransform transform = new UniqueTransform(uniqueFields, queryExecutionForPageTimeout); + + if (transform.uniqueFields.isMostRecent()) { + // @formatter:off + // noinspection unchecked + transform.map = (HdfsBackedSortedMap) HdfsBackedSortedMap.builder() + .withComparator(keyComparator) + .withRewriteStrategy(keyValueComparator) + .withBufferPersistThreshold(bufferPersistThreshold) + .withIvaratorCacheDirs(getIvaratorCacheDirs(ivaratorCacheDirConfigs, hdfsSiteConfigURLs, subDirectory)) + .withUniqueSubPath("byUniqueKey") + .withMaxOpenFiles(maxOpenFiles) + .withNumRetries(numRetries) + .withPersistOptions(persistOptions) + .withMapFactory(new FileByteDocumentSortedMap.Factory()) + .build(); + + // noinspection unchecked + transform.returnSet = (HdfsBackedSortedMap) HdfsBackedSortedMap.builder() + .withBufferPersistThreshold(bufferPersistThreshold) + .withIvaratorCacheDirs(getIvaratorCacheDirs(ivaratorCacheDirConfigs, hdfsSiteConfigURLs, subDirectory)) + .withUniqueSubPath("byDocKey") + .withMaxOpenFiles(maxOpenFiles) + .withNumRetries(numRetries) + .withPersistOptions(persistOptions) + .withMapFactory(new FileKeyDocumentSortedMap.Factory()) + .build(); + // @formatter:on + } else { + transform.bloom = BloomFilter.create(new ByteFunnel(), 500000, 1e-15); + } + + return transform; + } + } + + private static long getTimestamp(Document doc) { + return getDocKeyAttr(doc).getTimestamp(); + } + + private static DocumentKey getDocKeyAttr(Document doc) { + return (DocumentKey) (doc.get(Document.DOCKEY_FIELD_NAME)); + } + + private static Key getDocKey(Document doc) { + return getDocKeyAttr(doc).getDocKey(); + } + } diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMap.java new file mode 100644 index 00000000000..b1eb4bed782 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMap.java @@ -0,0 +1,545 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.stream.Collectors; + +import org.apache.log4j.Logger; + +import datawave.query.util.sortedmap.FileSortedMap.SortedMapFileHandler; + +/** + * This is a sorted map that will hold up to a specified number of entries before flushing the data to disk. Files will be created as needed. An additional + * "persist" call is supplied to force flushing to disk. The iterator.remove and the submap operations will work up until any buffer has been flushed to disk. + * After that, those operations will not work as specified by the underlying FileSortedMap. + * + * @param + * key of the map + * @param + * value of the map + **/ +public class BufferedFileBackedSortedMap implements SortedMap, RewritableSortedMap { + private static final Logger log = Logger.getLogger(BufferedFileBackedSortedMap.class); + protected static final int DEFAULT_BUFFER_PERSIST_THRESHOLD = 1000; + protected static final int DEFAULT_MAX_OPEN_FILES = 100; + protected static final int DEFAULT_NUM_RETRIES = 2; + + protected MultiMapBackedSortedMap map = new MultiMapBackedSortedMap<>(); + protected int maxOpenFiles = DEFAULT_MAX_OPEN_FILES; + protected FileSortedMap buffer = null; + protected FileSortedMap.FileSortedMapFactory mapFactory = null; + protected final Comparator comparator; + protected FileSortedMap.RewriteStrategy rewriteStrategy; + protected boolean sizeModified = false; + protected int size = 0; + protected int numRetries = DEFAULT_NUM_RETRIES; + + protected List handlerFactories; + protected int bufferPersistThreshold = DEFAULT_BUFFER_PERSIST_THRESHOLD; + + /** + * A factory for SortedMapFileHandlers + * + * + * + */ + public interface SortedMapFileHandlerFactory { + SortedMapFileHandler createHandler() throws IOException; + + boolean isValid(); + } + + public static class Builder,K,V> { + private int maxOpenFiles = DEFAULT_MAX_OPEN_FILES; + private FileSortedMap.FileSortedMapFactory mapFactory = new FileSerializableSortedMap.Factory(); + private Comparator comparator; + private FileSortedMap.RewriteStrategy rewriteStrategy; + private int numRetries = DEFAULT_NUM_RETRIES; + private List handlerFactories = new ArrayList<>(); + private int bufferPersistThreshold = DEFAULT_BUFFER_PERSIST_THRESHOLD; + + public Builder() {} + + @SuppressWarnings("unchecked") + protected B self() { + return (B) this; + } + + public B withMaxOpenFiles(int maxOpenFiles) { + this.maxOpenFiles = maxOpenFiles; + return self(); + } + + @SuppressWarnings("unchecked") + public B withMapFactory(FileSortedMap.FileSortedMapFactory mapFactory) { + this.mapFactory = (FileSortedMap.FileSortedMapFactory) mapFactory; + return self(); + } + + @SuppressWarnings("unchecked") + public B withComparator(Comparator comparator) { + this.comparator = (Comparator) comparator; + return self(); + } + + @SuppressWarnings("unchecked") + public B withRewriteStrategy(FileSortedMap.RewriteStrategy rewriteStrategy) { + this.rewriteStrategy = (FileSortedMap.RewriteStrategy) rewriteStrategy; + return self(); + } + + public B withNumRetries(int numRetries) { + this.numRetries = numRetries; + return self(); + } + + public B withHandlerFactories(List handlerFactories) { + this.handlerFactories = handlerFactories; + return self(); + } + + public B withBufferPersistThreshold(int bufferPersistThreshold) { + this.bufferPersistThreshold = bufferPersistThreshold; + return self(); + } + + public BufferedFileBackedSortedMap build() throws Exception { + return new BufferedFileBackedSortedMap<>(this); + } + } + + public static Builder builder() { + return new Builder<>(); + } + + protected BufferedFileBackedSortedMap(BufferedFileBackedSortedMap other) { + this.comparator = other.comparator; + this.rewriteStrategy = other.rewriteStrategy; + this.handlerFactories = new ArrayList<>(other.handlerFactories); + this.mapFactory = other.mapFactory; + this.bufferPersistThreshold = other.bufferPersistThreshold; + this.numRetries = other.numRetries; + this.maxOpenFiles = other.maxOpenFiles; + for (SortedMap submap : other.map.getMaps()) { + FileSortedMap clone = ((FileSortedMap) submap).clone(); + this.map.addMap(clone); + if (!clone.isPersisted()) { + this.buffer = clone; + } + } + this.sizeModified = other.sizeModified; + this.size = other.size; + } + + protected BufferedFileBackedSortedMap(Builder builder) { + this.comparator = builder.comparator; + this.rewriteStrategy = builder.rewriteStrategy; + this.handlerFactories = new ArrayList<>(builder.handlerFactories); + this.mapFactory = builder.mapFactory; + this.bufferPersistThreshold = builder.bufferPersistThreshold; + this.numRetries = builder.numRetries; + this.maxOpenFiles = builder.maxOpenFiles; + } + + private SortedMapFileHandler createFileHandler(SortedMapFileHandlerFactory handlerFactory) throws IOException { + if (handlerFactory.isValid()) { + try { + return handlerFactory.createHandler(); + } catch (IOException e) { + log.warn("Unable to create file handler using handler factory: " + handlerFactory, e); + } + } + + return null; + } + + public void persist() throws IOException { + if (buffer != null) { + // go through the handler factories and try to persist the sorted map + for (int i = 0; i < handlerFactories.size() && !buffer.isPersisted(); i++) { + SortedMapFileHandlerFactory handlerFactory = handlerFactories.get(i); + SortedMapFileHandler handler = createFileHandler(handlerFactory); + + // if we have a valid handler, try to persist + if (handler != null) { + Exception cause = null; + for (int attempts = 0; attempts <= numRetries && !buffer.isPersisted(); attempts++) { + try { + buffer.persist(handler); + } catch (IOException e) { + if (attempts == numRetries) + cause = e; + } + } + + if (!buffer.isPersisted()) { + log.warn("Unable to persist the sorted map using the file handler: " + handler, cause); + + // if this was an hdfs file handler, decrement the count + if (handlerFactory instanceof HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory) { + HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory hdfsHandlerFactory = ((HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory) handlerFactory); + hdfsHandlerFactory.mapFileCount(hdfsHandlerFactory.getFileCount() - 1); + } + } + } else { + log.warn("Unable to create a file handler using the handler factory: " + handlerFactory); + } + } + + // if the buffer was not persisted, throw an exception + if (!buffer.isPersisted()) + throw new IOException("Unable to persist the sorted map using the configured handler factories."); + + buffer = null; + compact(maxOpenFiles); + } + } + + protected List> getMaps() { + List> maps = new ArrayList<>(); + for (SortedMap submap : map.getMaps()) { + maps.add((FileSortedMap) submap); + } + return maps; + } + + protected void addMap(FileSortedMap submap) { + map.addMap(submap); + size += submap.size(); + } + + public boolean hasPersistedData() { + for (SortedMap submap : map.getMaps()) { + if (((FileSortedMap) submap).isPersisted()) { + return true; + } + } + return false; + } + + public boolean isPersisted() { + // we are (completely) persisted iff the buffer is persisted + return (buffer == null || buffer.isPersisted()); + } + + @Override + public int size() { + if (sizeModified) { + this.size = map.size(); + sizeModified = false; + } + return this.size; + } + + public int getBufferPersistThreshold() { + return this.bufferPersistThreshold; + } + + public int getBufferSize() { + return (this.buffer == null ? 0 : this.buffer.size()); + } + + @Override + public boolean isEmpty() { + return size() == 0; + } + + @Override + public boolean containsKey(Object o) { + // try the cheap operation first + if (buffer != null && buffer.containsKey(o)) { + return true; + } else { + return map.containsKey(o); + } + } + + @Override + public boolean containsValue(Object value) { + return false; + } + + private String printHandlerFactories() { + return String.join(", ", handlerFactories.stream().map(SortedMapFileHandlerFactory::toString).collect(Collectors.toList())); + } + + /** + * If the number of maps is over maxFiles, then start compacting those files down. The goal is to get the number of files down around 50% of maxFiles. + * + * @param maxFiles + * the max number of files + * @throws IOException + * for IO Exceptions + */ + public void compact(int maxFiles) throws IOException { + // if we have more maps than we are allowed, then we need to compact this down + if (maxFiles > 0 && map.getMaps().size() > maxFiles) { + if (log.isDebugEnabled()) { + log.debug("Compacting [" + printHandlerFactories() + "]"); + } + // create a copy of the map list (sorting below) + List> maps = new ArrayList<>(map.getMaps()); + + // calculate the number of maps to compact + int nummaps = maps.size(); + int excessmaps = nummaps - (maxFiles / 2); // those over 50% of maxFiles + int mapsPerCompaction = Math.min(excessmaps + 1, nummaps); // Add in 1 to account for the compacted map being added back in + + // sort the maps by size (compact up smaller maps first) + maps.sort(Comparator.comparing(SortedMap::size).reversed()); + + // newmap will be the final multimap + MultiMapBackedSortedMap newmap = new MultiMapBackedSortedMap<>(); + + // create a map for those maps to be compacted into one file + MultiMapBackedSortedMap mapToCompact = new MultiMapBackedSortedMap<>(); + for (int i = 0; i < mapsPerCompaction; i++) { + mapToCompact.addMap(maps.remove(maps.size() - 1)); + } + + // compact it + if (log.isDebugEnabled()) { + log.debug("Starting compaction for " + mapToCompact); + } + long start = System.currentTimeMillis(); + FileSortedMap compaction = compact(mapToCompact); + if (log.isDebugEnabled()) { + long delta = System.currentTimeMillis() - start; + log.debug("Compacted " + mapToCompact + " -> " + compaction + " in " + delta + "ms"); + } + + // add the compacted map to our final multimap + newmap.addMap(compaction); + + // clear the compactions map to remove the files that were compacted + mapToCompact.clear(); + + // now add in the maps we did not compact + for (int i = 0; i < maps.size(); i++) { + newmap.addMap(maps.get(i)); + } + + // and replace our map + this.map = newmap; + } + } + + private FileSortedMap compact(MultiMapBackedSortedMap mapToCompact) throws IOException { + FileSortedMap compactedmap = null; + + // go through the handler factories and try to persist the sorted map + for (int i = 0; i < handlerFactories.size() && compactedmap == null; i++) { + SortedMapFileHandlerFactory handlerFactory = handlerFactories.get(i); + SortedMapFileHandler handler = createFileHandler(handlerFactory); + + // if we have a valid handler, try to persist + if (handler != null) { + Exception cause = null; + for (int attempts = 0; attempts <= numRetries && compactedmap == null; attempts++) { + try { + compactedmap = mapFactory.newInstance(mapToCompact, handlerFactory.createHandler(), true); + } catch (IOException e) { + if (attempts == numRetries) + cause = e; + } + } + + if (compactedmap == null) { + log.warn("Unable to compact the sorted map using the file handler: " + handler, cause); + + // if this was an hdfs file handler, decrement the count + if (handlerFactory instanceof HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory) { + HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory hdfsHandlerFactory = ((HdfsBackedSortedMap.SortedMapHdfsFileHandlerFactory) handlerFactory); + hdfsHandlerFactory.mapFileCount(hdfsHandlerFactory.getFileCount() - 1); + } + } + } else { + log.warn("Unable to create a file handler using the handler factory: " + handlerFactory); + } + } + + // if the sorted maps were not compacted, throw an exception + if (compactedmap == null) + throw new IOException("Unable to persist the sorted map using the configured handler factories."); + + return compactedmap; + } + + @Override + public V put(K key, V value) { + if (buffer == null) { + try { + buffer = mapFactory.newInstance(comparator, rewriteStrategy, null, false); + } catch (Exception ex) { + throw new IllegalStateException("Unable to create an underlying FileSortedMap", ex); + } + + map.addMap(buffer); + } + V previous = buffer.put(key, value); + sizeModified = true; + if (previous != null) { + if (buffer.size() >= bufferPersistThreshold) { + try { + persist(); + } catch (Exception ex) { + throw new IllegalStateException("Unable to persist or compact FileSortedMap", ex); + } + } + return previous; + } + return null; + } + + @Override + public void putAll(Map c) { + if (buffer == null) { + try { + buffer = mapFactory.newInstance(comparator, rewriteStrategy, null, false); + } catch (Exception ex) { + throw new IllegalStateException("Unable to create an underlying FileSortedMap", ex); + } + map.addMap(buffer); + } + buffer.putAll(c); + sizeModified = true; + if (buffer.size() >= bufferPersistThreshold) { + try { + persist(); + } catch (Exception ex) { + throw new IllegalStateException("Unable to persist or compact FileSortedMap", ex); + } + } + } + + @Override + public V remove(Object o) { + V value = null; + for (SortedMap map : map.getMaps()) { + FileSortedMap filemap = (FileSortedMap) map; + boolean persist = false; + if (filemap.isPersisted()) { + try { + filemap.load(); + persist = true; + } catch (Exception e) { + throw new IllegalStateException("Unable to remove item from underlying files", e); + } + } + + V testValue = map.remove(o); + if (testValue != null) { + if (value != null) { + if (rewriteStrategy == null || rewriteStrategy.rewrite((K) o, value, testValue)) { + value = testValue; + } + } else { + value = testValue; + } + } + + if (persist) { + try { + filemap.persist(); + } catch (Exception e) { + throw new IllegalStateException("Unable to remove item from underlying files", e); + } + } + } + if (value != null) { + this.sizeModified = true; + } + return value; + } + + @Override + public void clear() { + // This will cause the MultimapBackedSortedMap to call clear on each map in its map of maps, including the buffer + // It will also call clear on its map of maps, emptying the contents + map.clear(); + // Null the buffer so that it will start new on the next add + buffer = null; + this.size = 0; + this.sizeModified = false; + } + + @Override + public Comparator comparator() { + return comparator; + } + + @Override + public SortedMap subMap(K fromKey, K toKey) { + return map.subMap(fromKey, toKey); + } + + @Override + public SortedMap headMap(K toKey) { + return map.headMap(toKey); + } + + @Override + public SortedMap tailMap(K fromKey) { + return map.tailMap(fromKey); + } + + @Override + public K firstKey() { + return map.firstKey(); + } + + @Override + public K lastKey() { + return map.lastKey(); + } + + @Override + public Set keySet() { + return map.keySet(); + } + + @Override + public Collection values() { + return map.values(); + } + + @Override + public Set> entrySet() { + return map.entrySet(); + } + + @Override + public void setRewriteStrategy(FileSortedMap.RewriteStrategy rewriteStrategy) { + this.rewriteStrategy = rewriteStrategy; + } + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return rewriteStrategy; + } + + @Override + public V get(Object o) { + V value = null; + for (SortedMap map : map.getMaps()) { + V testValue = map.get(o); + if (testValue != null) { + if (value != null) { + if (rewriteStrategy == null || rewriteStrategy.rewrite((K) o, value, testValue)) { + value = testValue; + } + } else { + value = testValue; + } + } + } + return value; + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileByteDocumentSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileByteDocumentSortedMap.java new file mode 100644 index 00000000000..76c34e0d63b --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileByteDocumentSortedMap.java @@ -0,0 +1,207 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.util.Comparator; +import java.util.SortedMap; + +import org.apache.log4j.Logger; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.rfile.RFileByteDocumentInputStream; +import datawave.query.util.sortedmap.rfile.RFileByteDocumentOutputStream; +import datawave.query.util.sortedset.ByteArrayComparator; +import datawave.query.util.sortedset.FileSortedSet; + +/** + * A sorted map that can be persisted into a file and still be read in its persisted state. The map can always be re-loaded and then all operations will work as + * expected. This will support null contained in the underlying maps iff a comparator is supplied that can handle null values. + * + * The persisted file will contain the serialized entries, followed by the actual size. + * + */ +public class FileByteDocumentSortedMap extends FileSortedMap { + private static Logger log = Logger.getLogger(FileByteDocumentSortedMap.class); + + public final static class DefaultByteComparator implements Comparator { + + @Override + public int compare(byte[] o1, byte[] o2) { + return new ByteArrayComparator().compare(o1, o2); + } + } + + /** + * Create a file sorted map from another one + * + * @param other + * the other sorted map + */ + public FileByteDocumentSortedMap(FileByteDocumentSortedMap other) { + super(other); + } + + /** + * Create a file sorted submap from another one + * + * @param other + * the other sorted map + * @param from + * the from key + * @param to + * the to key + */ + public FileByteDocumentSortedMap(FileByteDocumentSortedMap other, byte[] from, byte[] to) { + super(other, from, to); + } + + /** + * Create a persisted sorted map + * + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileByteDocumentSortedMap(SortedMapFileHandler handler, boolean persisted) { + this(new DefaultByteComparator(), handler, persisted); + } + + /** + * Create a persisted sorted map + * + * @param comparator + * the key comparator + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileByteDocumentSortedMap(Comparator comparator, SortedMapFileHandler handler, boolean persisted) { + super((comparator == null ? new DefaultByteComparator() : comparator), new ByteDocumentFileHandler(handler), new Factory(), persisted); + } + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + */ + public FileByteDocumentSortedMap(SortedMap map, SortedMapFileHandler handler) { + super(map, new ByteDocumentFileHandler(handler), new Factory()); + } + + /** + * Create a sorted map out of another sorted map. If persist is true, then the map will be directly persisted using the map's iterator which avoids pulling + * all of its entries into memory at once. + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + * @param persist + * boolean flag for persist + * @throws IOException + * for issues with read/write + */ + public FileByteDocumentSortedMap(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + super(map, new ByteDocumentFileHandler(handler), new Factory(), persist); + } + + /** + * This will dump the map to the file, making the map "persisted" + * + * @param handler + * the sorted map file handler + * @throws IOException + * for issues with read/write + */ + public void persist(SortedMapFileHandler handler) throws IOException { + // ensure this handler is wrapped with our handler + super.persist(new ByteDocumentFileHandler(handler)); + } + + /** + * Clone this map + */ + @Override + public FileByteDocumentSortedMap clone() { + return (FileByteDocumentSortedMap) super.clone(); + } + + /** + * A SortedMapfilehandler that can bound the input stream + */ + public static class ByteDocumentFileHandler implements BoundedTypedSortedMapFileHandler { + SortedMapFileHandler delegate; + + public ByteDocumentFileHandler(SortedMapFileHandler handler) { + this.delegate = handler; + } + + @Override + public SortedMapInputStream getInputStream() throws IOException { + return new RFileByteDocumentInputStream(delegate.getInputStream(), delegate.getSize()); + } + + @Override + public SortedMapInputStream getInputStream(byte[] start, byte[] end) throws IOException { + return new RFileByteDocumentInputStream(delegate.getInputStream(), delegate.getSize(), start, end); + } + + @Override + public SortedMapOutputStream getOutputStream() throws IOException { + return new RFileByteDocumentOutputStream(delegate.getOutputStream()); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return delegate.getPersistOptions(); + } + + @Override + public long getSize() { + return delegate.getSize(); + } + + @Override + public void deleteFile() { + delegate.deleteFile(); + } + } + + /** + * A factory for these file sorted maps + */ + public static class Factory implements FileSortedMapFactory { + + @Override + public FileByteDocumentSortedMap newInstance(FileSortedMap other) { + return new FileByteDocumentSortedMap((FileByteDocumentSortedMap) other); + } + + @Override + public FileByteDocumentSortedMap newInstance(FileSortedMap other, byte[] from, byte[] to) { + return new FileByteDocumentSortedMap((FileByteDocumentSortedMap) other, from, to); + } + + @Override + public FileSortedMap newInstance(Comparator comparator, RewriteStrategy rewriteStrategy, + SortedMapFileHandler handler, boolean persisted) { + FileByteDocumentSortedMap map = new FileByteDocumentSortedMap(comparator, handler, persisted); + map.setRewriteStrategy(rewriteStrategy); + return map; + } + + @Override + public FileByteDocumentSortedMap newInstance(SortedMap map, SortedMapFileHandler handler) { + return new FileByteDocumentSortedMap(map, handler); + } + + @Override + public FileByteDocumentSortedMap newInstance(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + return new FileByteDocumentSortedMap(map, handler, persist); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyDocumentSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyDocumentSortedMap.java new file mode 100644 index 00000000000..6fdb085f8e5 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyDocumentSortedMap.java @@ -0,0 +1,206 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.util.Comparator; +import java.util.SortedMap; + +import org.apache.accumulo.core.data.Key; +import org.apache.log4j.Logger; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.rfile.RFileKeyDocumentInputStream; +import datawave.query.util.sortedmap.rfile.RFileKeyDocumentOutputStream; +import datawave.query.util.sortedset.FileSortedSet; + +/** + * A sorted map that can be persisted into a file and still be read in its persisted state. The map can always be re-loaded and then all operations will work as + * expected. This will support null contained in the underlying maps iff a comparator is supplied that can handle null values. + * + * The persisted file will contain the serialized entries, followed by the actual size. + * + */ +public class FileKeyDocumentSortedMap extends FileSortedMap { + private static Logger log = Logger.getLogger(FileKeyDocumentSortedMap.class); + + public static class DefaultKeyComparator implements Comparator { + @Override + public int compare(Key o1, Key o2) { + return o1.compareTo(o2); + } + } + + /** + * Create a file sorted map from another one + * + * @param other + * the other sorted map + */ + public FileKeyDocumentSortedMap(FileKeyDocumentSortedMap other) { + super(other); + } + + /** + * Create a file sorted submap from another one + * + * @param other + * the other sorted map + * @param from + * the from key + * @param to + * the to key + */ + public FileKeyDocumentSortedMap(FileKeyDocumentSortedMap other, Key from, Key to) { + super(other, from, to); + } + + /** + * Create a persisted sorted map + * + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileKeyDocumentSortedMap(SortedMapFileHandler handler, boolean persisted) { + this(new DefaultKeyComparator(), handler, persisted); + } + + /** + * Create a persisted sorted map + * + * @param comparator + * the key comparator + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileKeyDocumentSortedMap(Comparator comparator, SortedMapFileHandler handler, boolean persisted) { + super((comparator == null ? new DefaultKeyComparator() : comparator), new KeyDocumentFileHandler(handler), new Factory(), persisted); + } + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + */ + public FileKeyDocumentSortedMap(SortedMap map, SortedMapFileHandler handler) { + super(map, new KeyDocumentFileHandler(handler), new Factory()); + } + + /** + * Create a sorted map out of another sorted map. If persist is true, then the map will be directly persisted using the map's iterator which avoids pulling + * all of its entries into memory at once. + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + * @param persist + * boolean flag for persist + * @throws IOException + * for issues with read/write + */ + public FileKeyDocumentSortedMap(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + super(map, new KeyDocumentFileHandler(handler), new Factory(), persist); + } + + /** + * This will dump the map to the file, making the map "persisted" + * + * @param handler + * the sorted map file handler + * @throws IOException + * for issues with read/write + */ + public void persist(SortedMapFileHandler handler) throws IOException { + // ensure this handler is wrapped with our handler + super.persist(new KeyDocumentFileHandler(handler)); + } + + /** + * Clone this map + */ + @Override + public FileKeyDocumentSortedMap clone() { + return (FileKeyDocumentSortedMap) super.clone(); + } + + /** + * A SortedMapfilehandler that can bound the input stream + */ + public static class KeyDocumentFileHandler implements BoundedTypedSortedMapFileHandler { + SortedMapFileHandler delegate; + + public KeyDocumentFileHandler(SortedMapFileHandler handler) { + this.delegate = handler; + } + + @Override + public SortedMapInputStream getInputStream() throws IOException { + return new RFileKeyDocumentInputStream(delegate.getInputStream(), delegate.getSize()); + } + + @Override + public SortedMapInputStream getInputStream(Key start, Key end) throws IOException { + return new RFileKeyDocumentInputStream(delegate.getInputStream(), delegate.getSize(), start, end); + } + + @Override + public SortedMapOutputStream getOutputStream() throws IOException { + return new RFileKeyDocumentOutputStream(delegate.getOutputStream()); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return delegate.getPersistOptions(); + } + + @Override + public long getSize() { + return delegate.getSize(); + } + + @Override + public void deleteFile() { + delegate.deleteFile(); + } + } + + /** + * A factory for these file sorted maps + */ + public static class Factory implements FileSortedMapFactory { + + @Override + public FileKeyDocumentSortedMap newInstance(FileSortedMap other) { + return new FileKeyDocumentSortedMap((FileKeyDocumentSortedMap) other); + } + + @Override + public FileKeyDocumentSortedMap newInstance(FileSortedMap other, Key from, Key to) { + return new FileKeyDocumentSortedMap((FileKeyDocumentSortedMap) other, from, to); + } + + @Override + public FileSortedMap newInstance(Comparator comparator, RewriteStrategy rewriteStrategy, SortedMapFileHandler handler, + boolean persisted) { + FileKeyDocumentSortedMap map = new FileKeyDocumentSortedMap(comparator, handler, persisted); + map.setRewriteStrategy(rewriteStrategy); + return map; + } + + @Override + public FileKeyDocumentSortedMap newInstance(SortedMap map, SortedMapFileHandler handler) { + return new FileKeyDocumentSortedMap(map, handler); + } + + @Override + public FileKeyDocumentSortedMap newInstance(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + return new FileKeyDocumentSortedMap(map, handler, persist); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyValueSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyValueSortedMap.java new file mode 100644 index 00000000000..976b5d75d47 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileKeyValueSortedMap.java @@ -0,0 +1,206 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.util.Comparator; +import java.util.SortedMap; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.log4j.Logger; + +import datawave.query.util.sortedmap.rfile.RFileKeyValueInputStream; +import datawave.query.util.sortedmap.rfile.RFileKeyValueOutputStream; +import datawave.query.util.sortedset.FileSortedSet; + +/** + * A sorted map that can be persisted into a file and still be read in its persisted state. The map can always be re-loaded and then all operations will work as + * expected. This will support null contained in the underlying maps iff a comparator is supplied that can handle null values. + * + * The persisted file will contain the serialized entries, followed by the actual size. + * + */ +public class FileKeyValueSortedMap extends FileSortedMap { + private static Logger log = Logger.getLogger(FileKeyValueSortedMap.class); + + public static class DefaultKeyComparator implements Comparator { + @Override + public int compare(Key o1, Key o2) { + return o1.compareTo(o2); + } + } + + /** + * Create a file sorted map from another one + * + * @param other + * the other sorted map + */ + public FileKeyValueSortedMap(FileKeyValueSortedMap other) { + super(other); + } + + /** + * Create a file sorted submap from another one + * + * @param other + * the other sorted map + * @param from + * the from key + * @param to + * the to key + */ + public FileKeyValueSortedMap(FileKeyValueSortedMap other, Key from, Key to) { + super(other, from, to); + } + + /** + * Create a persisted sorted map + * + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileKeyValueSortedMap(SortedMapFileHandler handler, boolean persisted) { + this(new DefaultKeyComparator(), handler, persisted); + } + + /** + * Create a persisted sorted map + * + * @param comparator + * the key comparator + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + */ + public FileKeyValueSortedMap(Comparator comparator, SortedMapFileHandler handler, boolean persisted) { + super(((comparator == null) ? new DefaultKeyComparator() : comparator), new KeyValueFileHandler(handler), new Factory(), persisted); + } + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + */ + public FileKeyValueSortedMap(SortedMap map, SortedMapFileHandler handler) { + super(map, new KeyValueFileHandler(handler), new Factory()); + } + + /** + * Create a sorted map out of another sorted map. If persist is true, then the map will be directly persisted using the map's iterator which avoid pulling + * all of its entries into memory at once. + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + * @param persist + * boolean flag for persist + * @throws IOException + * for issues with read/write + */ + public FileKeyValueSortedMap(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + super(map, new KeyValueFileHandler(handler), new Factory(), persist); + } + + /** + * This will dump the map to the file, making the map "persisted" + * + * @param handler + * the sorted map file handler + * @throws IOException + * for issues with read/write + */ + public void persist(SortedMapFileHandler handler) throws IOException { + // ensure this handler is wrapped with our handler + super.persist(new KeyValueFileHandler(handler)); + } + + /** + * Clone this map + */ + @Override + public FileKeyValueSortedMap clone() { + return (FileKeyValueSortedMap) super.clone(); + } + + /** + * A SortedMapfilehandler that can bound the input stream + */ + public static class KeyValueFileHandler implements BoundedTypedSortedMapFileHandler { + SortedMapFileHandler delegate; + + public KeyValueFileHandler(SortedMapFileHandler handler) { + this.delegate = handler; + } + + @Override + public SortedMapInputStream getInputStream() throws IOException { + return new RFileKeyValueInputStream(delegate.getInputStream(), delegate.getSize()); + } + + @Override + public SortedMapInputStream getInputStream(Key start, Key end) throws IOException { + return new RFileKeyValueInputStream(delegate.getInputStream(), delegate.getSize(), start, end); + } + + @Override + public SortedMapOutputStream getOutputStream() throws IOException { + return new RFileKeyValueOutputStream(delegate.getOutputStream()); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return delegate.getPersistOptions(); + } + + @Override + public long getSize() { + return delegate.getSize(); + } + + @Override + public void deleteFile() { + delegate.deleteFile(); + } + } + + /** + * A factory for these file sorted maps + */ + public static class Factory implements FileSortedMapFactory { + + @Override + public FileKeyValueSortedMap newInstance(FileSortedMap other) { + return new FileKeyValueSortedMap((FileKeyValueSortedMap) other); + } + + @Override + public FileKeyValueSortedMap newInstance(FileSortedMap other, Key from, Key to) { + return new FileKeyValueSortedMap((FileKeyValueSortedMap) other, from, to); + } + + @Override + public FileKeyValueSortedMap newInstance(Comparator comparator, RewriteStrategy rewriteStategy, SortedMapFileHandler handler, + boolean persisted) { + FileKeyValueSortedMap map = new FileKeyValueSortedMap(comparator, handler, persisted); + map.setRewriteStrategy(rewriteStategy); + return map; + } + + @Override + public FileKeyValueSortedMap newInstance(SortedMap map, SortedMapFileHandler handler) { + return new FileKeyValueSortedMap(map, handler); + } + + @Override + public FileKeyValueSortedMap newInstance(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + return new FileKeyValueSortedMap(map, handler, persist); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSerializableSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSerializableSortedMap.java new file mode 100644 index 00000000000..c61edd025da --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSerializableSortedMap.java @@ -0,0 +1,289 @@ +package datawave.query.util.sortedmap; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.util.Comparator; +import java.util.Map; +import java.util.SortedMap; + +import org.apache.commons.collections4.keyvalue.UnmodifiableMapEntry; +import org.apache.log4j.Logger; + +import datawave.query.util.sortedset.FileSortedSet; + +/** + * A sorted map that can be persisted into a file and still be read in its persisted state. The map can always be re-loaded and then all operations will work as + * expected. This will support null contained in the underlying maps iff a comparator is supplied that can handle null values. + * + * The persisted file will contain the serialized entries, followed by the actual size. + * + * @param + * key of the map + * @param + * value of the map + */ +public class FileSerializableSortedMap extends FileSortedMap { + private static Logger log = Logger.getLogger(FileSerializableSortedMap.class); + + /** + * Create a file sorted map from another one + * + * @param other + * the other sorted map + */ + public FileSerializableSortedMap(FileSerializableSortedMap other) { + super(other); + } + + /** + * Create a file sorted submap from another one + * + * @param other + * the other sorted map + * @param from + * the from file + * @param to + * the to file + */ + public FileSerializableSortedMap(FileSerializableSortedMap other, K from, K to) { + super(other, from, to); + } + + /** + * Create a persisted sorted map + * + * @param handler + * a file handler + * @param persisted + * persisted boolean flag + */ + public FileSerializableSortedMap(TypedSortedMapFileHandler handler, boolean persisted) { + super(handler, new Factory(), persisted); + } + + /** + * Create a persistede sorted map + * + * @param comparator + * a comparator + * @param handler + * a file handler + * @param persisted + * persisted boolean flag + */ + public FileSerializableSortedMap(Comparator comparator, TypedSortedMapFileHandler handler, boolean persisted) { + super(comparator, handler, new Factory(), persisted); + } + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * a sorted map + * @param handler + * a file handler + */ + public FileSerializableSortedMap(SortedMap map, TypedSortedMapFileHandler handler) { + super(map, handler, new Factory()); + } + + /** + * Create an sorted map out of another sorted map. If persist is true, then the map will be directly persisted using the map's iterator which avoid pulling + * all of its entries into memory at once. + * + * @param map + * a sorted map + * @param handler + * a file handler + * @param persist + * a persist flag + * @throws IOException + * for issues with read/write + */ + public FileSerializableSortedMap(SortedMap map, TypedSortedMapFileHandler handler, boolean persist) throws IOException { + super(map, handler, new Factory(), persist); + } + + /** + * Persist a map using the specified handler + * + * @param handler + * a file handler + * @throws IOException + * for issues with read/write + */ + @Override + public void persist(SortedMapFileHandler handler) throws IOException { + super.persist(new SerializableFileHandler(handler)); + } + + @Override + public FileSerializableSortedMap clone() { + return (FileSerializableSortedMap) super.clone(); + } + + /** + * A SortedMapfilehandler that can handler serializable objects + */ + public static class SerializableFileHandler implements TypedSortedMapFileHandler { + SortedMapFileHandler delegate; + + public SerializableFileHandler(SortedMapFileHandler handler) { + this.delegate = handler; + } + + @Override + public SortedMapInputStream getInputStream() throws IOException { + return new SerializableInputStream(delegate.getInputStream(), delegate.getSize()); + } + + @Override + public SortedMapOutputStream getOutputStream() throws IOException { + return new SerializableOutputStream(delegate.getOutputStream()); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return delegate.getPersistOptions(); + } + + @Override + public long getSize() { + return delegate.getSize(); + } + + @Override + public void deleteFile() { + delegate.deleteFile(); + } + } + + public static class SerializableInputStream implements SortedMapInputStream { + private final InputStream stream; + private ObjectInputStream delegate; + private final long length; + + public SerializableInputStream(InputStream stream, long length) throws IOException { + this.stream = stream; + this.length = length; + } + + private ObjectInputStream getDelegate() throws IOException { + if (delegate == null) { + this.delegate = new ObjectInputStream(stream); + } + return delegate; + } + + @Override + public Map.Entry readObject() throws IOException { + try { + K key = (K) getDelegate().readObject(); + V value = (V) getDelegate().readObject(); + return new UnmodifiableMapEntry<>(key, value); + } catch (IOException ioe) { + return null; + } catch (ClassNotFoundException nnfe) { + return null; + } + } + + @Override + public int readSize() throws IOException { + long bytesToSkip = length - 4; + long total = 0; + long cur = 0; + + while ((total < bytesToSkip) && ((cur = stream.skip(bytesToSkip - total)) > 0)) { + total += cur; + } + + byte[] buffer = new byte[4]; + stream.read(buffer); + + // read the 4 bytes of an integer in a deterministic order0 + return ((buffer[3] & 0xFF)) + ((buffer[2] & 0xFF) << 8) + ((buffer[1] & 0xFF) << 16) + ((buffer[0]) << 24); + } + + @Override + public void close() { + try { + if (delegate != null) { + delegate.close(); + } else { + stream.close(); + } + } catch (Exception e) { + log.error("Failed to close input stream", e); + } + } + } + + public static class SerializableOutputStream implements FileSortedMap.SortedMapOutputStream { + private ObjectOutputStream delegate; + + public SerializableOutputStream(OutputStream stream) throws IOException { + delegate = new ObjectOutputStream(stream); + } + + @Override + public void writeObject(K key, V value) throws IOException { + delegate.writeObject(key); + delegate.writeObject(value); + } + + @Override + public void writeSize(int size) throws IOException { + // write the 4 bytes of a integer in a deterministic order0 + delegate.write((size >>> 24) & 0xFF); + delegate.write((size >>> 16) & 0xFF); + delegate.write((size >>> 8) & 0xFF); + delegate.write((size >>> 0) & 0xFF); + } + + @Override + public void close() throws IOException { + delegate.close(); + } + } + + /** + * A factory for this map + */ + public static class Factory implements FileSortedMapFactory { + + @Override + public FileSerializableSortedMap newInstance(FileSortedMap other) { + return new FileSerializableSortedMap((FileSerializableSortedMap) other); + } + + @Override + public FileSerializableSortedMap newInstance(FileSortedMap other, K from, K to) { + return new FileSerializableSortedMap((FileSerializableSortedMap) other, from, to); + } + + @Override + public FileSerializableSortedMap newInstance(Comparator comparator, RewriteStrategy rewriteStrategy, SortedMapFileHandler handler, + boolean persisted) { + FileSerializableSortedMap map = new FileSerializableSortedMap(comparator, new SerializableFileHandler(handler), persisted); + map.setRewriteStrategy(rewriteStrategy); + return map; + } + + @Override + public FileSortedMap newInstance(SortedMap map, SortedMapFileHandler handler) { + return new FileSerializableSortedMap(map, new SerializableFileHandler(handler)); + } + + @Override + public FileSortedMap newInstance(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException { + return new FileSerializableSortedMap(map, new SerializableFileHandler(handler), persist); + } + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSortedMap.java new file mode 100644 index 00000000000..d29c2e37150 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/FileSortedMap.java @@ -0,0 +1,1073 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.OutputStream; +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.collections.IteratorUtils; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.apache.log4j.Logger; + +import datawave.query.util.sortedset.FileSortedSet; +import datawave.webservice.query.exception.DatawaveErrorCode; +import datawave.webservice.query.exception.QueryException; + +/** + * A sorted map that can be persisted into a file and still be read in its persisted state. The map can always be re-loaded and then all operations will work as + * expected. This class will not support null values. + * + * The persisted file will contain the serialized entries, followed by the actual size. + * + * A RewriteStrategy can be supplied that will determine whether a value gets replaced when putting a key,value pair. + * + * @param + * key of the map + * @param + * value of the map + */ +public abstract class FileSortedMap implements SortedMap, Cloneable, RewritableSortedMap { + private static final Logger log = Logger.getLogger(FileSortedMap.class); + protected boolean persisted; + protected K[] range; + protected SortedMap map; + protected RewriteStrategy rewriteStrategy = null; + + // The file handler that handles the underlying io + protected TypedSortedMapFileHandler handler; + // The sort map factory + protected FileSortedMapFactory factory; + + /** + * A class that represents a null object within the map + */ + public static class NullObject implements Serializable { + private static final long serialVersionUID = -5528112099317370355L; + } + + /** + * Create a file sorted map from another one + * + * @param other + * the other sorted map + */ + public FileSortedMap(FileSortedMap other) { + this.handler = other.handler; + this.factory = other.factory; + this.map = new TreeMap<>(other.map); + this.persisted = other.persisted; + this.range = other.range; + this.rewriteStrategy = other.rewriteStrategy; + } + + /** + * Create a file sorted submap from another one + * + * @param other + * the other sorted map + * @param from + * the from key + * @param to + * the to key + */ + public FileSortedMap(FileSortedMap other, K from, K to) { + this(other); + if (from != null || to != null) { + if (persisted) { + this.range = (K[]) new Object[] {getStart(from), getEnd(to)}; + } else if (to == null) { + this.map = this.map.tailMap(from); + } else if (from == null) { + this.map = this.map.headMap(to); + } else { + this.map = this.map.subMap(from, to); + } + } + } + + /** + * Create a persisted sorted map + * + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + * @param factory + * the sorted map factory + */ + public FileSortedMap(TypedSortedMapFileHandler handler, FileSortedMapFactory factory, boolean persisted) { + this.handler = handler; + this.factory = factory; + this.map = new TreeMap<>(); + this.persisted = persisted; + } + + /** + * Create a persisted sorted map + * + * @param comparator + * the key comparator + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + * @param factory + * the sorted map factory + */ + public FileSortedMap(Comparator comparator, TypedSortedMapFileHandler handler, FileSortedMapFactory factory, boolean persisted) { + this.handler = handler; + this.factory = factory; + this.map = new TreeMap<>(comparator); + this.persisted = persisted; + } + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * a sorted map + * @param handler + * the sorted map file handler + * @param factory + * the sorted map factory + */ + public FileSortedMap(SortedMap map, TypedSortedMapFileHandler handler, FileSortedMapFactory factory) { + this.handler = handler; + this.factory = factory; + this.map = new TreeMap<>(map); + this.persisted = false; + if (map instanceof RewritableSortedMap) { + setRewriteStrategy(((RewritableSortedMap) map).getRewriteStrategy()); + } + } + + /** + * Create a sorted map out of another sorted map. If persist is true, then the map will be directly persisted using the map's iterator which avoid pulling + * all of its entries into memory at once. + * + * @param map + * a sorted map + * @param handler + * the sorted map file handler + * @param factory + * the sorted map factory + * @param persist + * the persist boolean flag + * @throws IOException + * for issues with read/write + */ + public FileSortedMap(SortedMap map, TypedSortedMapFileHandler handler, FileSortedMapFactory factory, boolean persist) throws IOException { + this.handler = handler; + this.factory = factory; + if (!persist) { + this.map = new TreeMap<>(map); + this.persisted = false; + } else { + this.map = new TreeMap<>(map.comparator()); + persist(map, handler); + persisted = true; + } + if (map instanceof RewritableSortedMap) { + setRewriteStrategy(((RewritableSortedMap) map).getRewriteStrategy()); + } + } + + @Override + public RewriteStrategy getRewriteStrategy() { + return rewriteStrategy; + } + + @Override + public void setRewriteStrategy(RewriteStrategy rewriteStrategy) { + this.rewriteStrategy = rewriteStrategy; + } + + /** + * This will revert this map to whatever contents are in the underlying file, making the map "persisted". This is intended to be used following a load + * command when no changes were actually made the the map If the persist options included verification, then the files will be verified prior to unloading. + * + * @throws IOException + * for issues with read/write + */ + public void unload() throws IOException { + if (!persisted) { + verifyPersistance(handler, this.map.size(), Collections.emptyList()); + this.map.clear(); + persisted = true; + } + } + + /** + * This will dump the map to the file, making the map "persisted" + * + * @throws IOException + * for issues with read/write + */ + public void persist() throws IOException { + persist(this.handler); + } + + /** + * This will dump the map to the file, making the map "persisted" + * + * @param handler + * the handler + * @throws IOException + * for issues with read/write + */ + public void persist(TypedSortedMapFileHandler handler) throws IOException { + if (!persisted) { + persist(this.map, handler); + this.map.clear(); + persisted = true; + } + } + + /** + * This will dump the map to a file, making the map "persisted" The implementation is expected to wrap the handler with a TypedSortedMapFileHandler and the + * call persist(TypedSortedMapFileHandler handler) + * + * @param handler + * the sorted map file handler + * @throws IOException + * for issues with read/write + */ + public abstract void persist(SortedMapFileHandler handler) throws IOException; + + /** + * Persist the supplied map to a file as defined by this classes sorted map file handler. + * + * @param map + * the map + * @param handler + * the handler + * @throws IOException + * for issues with read/write + * + */ + private void persist(SortedMap map, TypedSortedMapFileHandler handler) throws IOException { + if (log.isDebugEnabled()) { + log.debug("Persisting " + handler); + } + + long start = System.currentTimeMillis(); + try { + // assign the passed in file handler + // if we can't persist, we will remap to null + this.handler = handler; + + int actualSize = 0; + FileSortedSet.PersistOptions persistOptions = handler.getPersistOptions(); + List> mapToVerify = new ArrayList<>(); + try (SortedMapOutputStream stream = handler.getOutputStream()) { + for (Entry t : map.entrySet()) { + stream.writeObject(t.getKey(), t.getValue()); + if (persistOptions.isVerifyElements() && mapToVerify.size() < persistOptions.getNumElementsToVerify()) { + mapToVerify.add(t); + } + actualSize++; + } + stream.writeSize(actualSize); + } + // now verify the written file + verifyPersistance(handler, actualSize, mapToVerify); + + } catch (IOException e) { + handler.deleteFile(); + this.handler = null; + throw e; + } + + if (log.isDebugEnabled()) { + long delta = System.currentTimeMillis() - start; + log.debug("Persisting " + handler + " took " + delta + "ms"); + } + } + + private void verifyPersistance(TypedSortedMapFileHandler handler, int size, List> mapToVerify) throws IOException { + // verify we wrote at least the size.... + if (handler.getSize() == 0) { + throw new IOException("Failed to verify file existence"); + } + FileSortedSet.PersistOptions persistOptions = handler.getPersistOptions(); + // now verify the first n objects were written correctly + if (persistOptions.isVerifyElements() && !mapToVerify.isEmpty()) { + try (SortedMapInputStream inStream = handler.getInputStream()) { + int count = 0; + for (Map.Entry t : mapToVerify) { + count++; + Map.Entry input = inStream.readObject(); + if (!equals(t, input)) { + throw new IOException("Failed to verify element " + count + " was written"); + } + } + } + } + + // now verify the size was written at the end + if (persistOptions.isVerifySize()) { + if (readSize() != size) { + throw new IOException("Failed to verify file size was written"); + } + } + } + + /** + * Read the size from the file which is in the last 4 bytes. + * + * @return the size (in terms of objects) + * @throws IOException + * for issues with read/write + */ + private int readSize() throws IOException { + try (SortedMapInputStream inStream = handler.getInputStream()) { + return inStream.readSize(); + } + } + + /** + * This will read the file into an in-memory map, making this file "unpersisted" + * + * @throws IOException + * for issues with read/write + * @throws ClassNotFoundException + * if the class is not found + */ + public void load() throws IOException, ClassNotFoundException { + if (persisted) { + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(getStart(), getEnd())) { + Map.Entry obj = stream.readObject(); + while (obj != null) { + map.put(obj.getKey(), obj.getValue()); + obj = stream.readObject(); + } + } + persisted = false; + } + } + + protected Map.Entry readObject(ObjectInputStream stream) { + try { + K key = (K) stream.readObject(); + V value = (V) stream.readObject(); + return new UnmodifiableMapEntry(key, value); + } catch (Exception E) { + return null; + } + } + + protected void writeObject(ObjectOutputStream stream, K key, V value) throws IOException { + stream.writeObject(key); + stream.writeObject(value); + } + + /* + * Is this map persisted? + */ + public boolean isPersisted() { + return persisted; + } + + /** + * Get the size of the map. Note if the map has been persisted, then this may be an upper bound on the size. + * + * @return the size upper bound + */ + @Override + public int size() { + if (persisted) { + if (isSubmap()) { + throw new IllegalStateException("Unable to determine size of a submap of a persisted map. Please call load() first."); + } + try { + return readSize(); + } catch (Exception e) { + throw new IllegalStateException("Unable to get size from file", e); + } + } else { + return map.size(); + } + } + + @Override + public boolean isEmpty() { + // must attempt to read the first element to be sure if persisted + try { + firstKey(); + return false; + } catch (NoSuchElementException e) { + return true; + } + } + + @SuppressWarnings("unchecked") + @Override + public boolean containsKey(Object o) { + if (persisted) { + K t = (K) o; + K start = getStart(); + K end = getEnd(); + if ((start != null) && (compare(t, start) < 0)) { + return false; + } + if ((end != null) && (compare(t, end) >= 0)) { + return false; + } + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(t, end)) { + Map.Entry next = stream.readObject(); + return (next != null && equals(next.getKey(), t)); + } catch (Exception e) { + return false; + } + } else { + return map.containsKey(o); + } + } + + @Override + public boolean containsValue(Object o) { + if (persisted) { + V t = (V) o; + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(getStart(), getEnd())) { + Map.Entry next = stream.readObject(); + while (next != null) { + if (next.getValue().equals(t)) { + return true; + } + next = stream.readObject(); + } + } catch (Exception e) { + return false; + } + return false; + } else { + return map.containsValue(o); + } + } + + @Override + public V get(Object key) { + if (persisted) { + K t = (K) key; + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(getStart(), getEnd())) { + Map.Entry next = stream.readObject(); + while (next != null) { + if (equals(next.getKey(), t)) { + return next.getValue(); + } + next = stream.readObject(); + } + } catch (Exception e) { + return null; + } + return null; + } else { + return map.get(key); + } + } + + @Override + public V put(K key, V value) { + if (persisted) { + throw new IllegalStateException("Cannot add an element to a persisted FileSortedMap. Please call load() first."); + } else { + V previous = map.get(key); + if ((previous == null) || (rewriteStrategy == null) || (rewriteStrategy.rewrite(key, previous, value))) { + map.put(key, value); + } + return previous; + } + } + + @Override + public V remove(Object o) { + if (persisted) { + throw new IllegalStateException("Cannot remove an element to a persisted FileSortedMap. Please call load() first."); + } else { + return map.remove(o); + } + } + + @Override + public void putAll(Map m) { + for (Entry entry : m.entrySet()) { + put(entry.getKey(), entry.getValue()); + } + } + + @Override + public void clear() { + if (persisted) { + handler.deleteFile(); + persisted = false; + } else { + map.clear(); + } + } + + @Override + public Comparator comparator() { + return map.comparator(); + } + + @Override + public SortedMap subMap(K fromElement, K toElement) { + return factory.newInstance(this, getStart(fromElement), getEnd(toElement)); + } + + @Override + public SortedMap headMap(K toElement) { + return factory.newInstance(this, getStart(null), getEnd(toElement)); + } + + @Override + public SortedMap tailMap(K fromElement) { + return factory.newInstance(this, getStart(fromElement), getEnd(null)); + } + + @Override + public K firstKey() { + if (persisted) { + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(getStart(), getEnd())) { + Map.Entry first = stream.readObject(); + return first.getKey(); + } catch (Exception e) { + throw new IllegalStateException(new QueryException(DatawaveErrorCode.FETCH_FIRST_ELEMENT_ERROR, e)); + } + } else if (!map.isEmpty()) { + return map.firstKey(); + } + throw (NoSuchElementException) new NoSuchElementException().initCause(new QueryException(DatawaveErrorCode.FETCH_FIRST_ELEMENT_ERROR)); + } + + @Override + public K lastKey() { + if (persisted) { + try (SortedMapInputStream stream = getBoundedFileHandler().getInputStream(getStart(), getEnd())) { + Map.Entry last = stream.readObject(); + Map.Entry next = stream.readObject(); + while (next != null) { + last = next; + next = stream.readObject(); + } + return last.getKey(); + } catch (Exception e) { + throw new IllegalStateException(new QueryException(DatawaveErrorCode.FETCH_LAST_ELEMENT_ERROR, e)); + } + } else if (!map.isEmpty()) { + return map.lastKey(); + } + throw (NoSuchElementException) new NoSuchElementException().initCause(new QueryException(DatawaveErrorCode.FETCH_LAST_ELEMENT_ERROR)); + } + + private Iterator> iterator() { + if (persisted) { + return new FileIterator(); + } else { + return map.entrySet().iterator(); + } + } + + @Override + public Set keySet() { + return new AbstractSet() { + + @Override + public Iterator iterator() { + return IteratorUtils.transformedIterator(FileSortedMap.this.iterator(), o -> ((Map.Entry) o).getKey()); + } + + @Override + public int size() { + return FileSortedMap.this.size(); + } + }; + } + + @Override + public Collection values() { + return new AbstractCollection() { + + @Override + public Iterator iterator() { + return IteratorUtils.transformedIterator(FileSortedMap.this.iterator(), o -> ((Map.Entry) o).getValue()); + } + + @Override + public int size() { + return FileSortedMap.this.size(); + } + }; + } + + @Override + public Set> entrySet() { + return new AbstractSet>() { + + @Override + public Iterator> iterator() { + return FileSortedMap.this.iterator(); + } + + @Override + public int size() { + return FileSortedMap.this.size(); + } + }; + } + + @Override + public String toString() { + return persisted ? handler.toString() : map.toString(); + } + + /** + * Extending classes must implement cloneable + * + * @return A clone + */ + public FileSortedMap clone() { + return factory.newInstance(this); + } + + /* Some utilities */ + private boolean equals(Map.Entry o1, Map.Entry o2) { + if (o1 == null) { + return o2 == null; + } else if (o2 == null) { + return false; + } else { + return equals(o1.getKey(), o2.getKey()) && o1.getValue().equals(o2.getValue()); + } + } + + private boolean equals(K o1, K o2) { + if (o1 == null) { + return o2 == null; + } else if (o2 == null) { + return false; + } else { + if (map.comparator() == null) { + return o1.equals(o2); + } else { + return map.comparator().compare(o1, o2) == 0; + } + } + } + + private K getStart() { + return (isSubmap() ? range[0] : null); + } + + private K getStart(K from) { + K start = getStart(); + if (start == null) { + return from; + } else if (from == null) { + return start; + } else if (compare(start, from) > 0) { + return start; + } else { + return from; + } + } + + private K getEnd() { + return (isSubmap() ? range[1] : null); + } + + private K getEnd(K to) { + K end = getEnd(); + if (end == null) { + return to; + } else if (to == null) { + return end; + } else if (compare(end, to) < 0) { + return end; + } else { + return to; + } + } + + private boolean isSubmap() { + return (range != null); + } + + private int compare(K a, K b) { + return (this.map.comparator() != null) ? this.map.comparator().compare(a, b) : ((Comparable) a).compareTo(b); + } + + public BoundedTypedSortedMapFileHandler getBoundedFileHandler() { + return new DefaultBoundedTypedSortedMapFileHandler(); + } + + /** + * This is the iterator for a persisted FileSortedMap + */ + protected class FileIterator implements Iterator> { + private SortedMapInputStream stream; + private Map.Entry next; + + public FileIterator() { + try { + this.stream = getBoundedFileHandler().getInputStream(getStart(), getEnd()); + next = stream.readObject(); + if (next == null) { + cleanup(); + } + } catch (Exception e) { + cleanup(); + throw new IllegalStateException("Unable to read file", e); + } + } + + public void cleanup() { + if (stream != null) { + try { + stream.close(); + } catch (Exception e) { + // we tried... + } + stream = null; + } + } + + @Override + public boolean hasNext() { + return (next != null); + } + + @Override + public Map.Entry next() { + if (!hasNext()) { + QueryException qe = new QueryException(DatawaveErrorCode.FETCH_NEXT_ELEMENT_ERROR); + throw (NoSuchElementException) (new NoSuchElementException().initCause(qe)); + } + try { + Map.Entry rtrn = next; + next = stream.readObject(); + if (next == null) { + cleanup(); + } + return rtrn; + } catch (Exception e) { + cleanup(); + throw new IllegalStateException("Unable to get next element from file", e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Iterator.remove() not supported on a persisted map."); + } + + @Override + protected void finalize() throws Throwable { + cleanup(); + super.finalize(); + } + } + + /** + * An interface for a sorted map factory + * + * @param + * key of the map + * @param + * value of the map + */ + public interface FileSortedMapFactory { + /** + * factory method + * + * @param other + * the other factory + * @return a new instance + */ + FileSortedMap newInstance(FileSortedMap other); + + /** + * factory method + * + * @param other + * the other factory + * @param from + * from instance + * @param to + * to instance + * @return a new instance + */ + FileSortedMap newInstance(FileSortedMap other, K from, K to); + + /** + * Factory method + * + * @param comparator + * the key comparator + * @param rewriteStrategy + * the rewrite strategy + * @param handler + * the sorted map file handler + * @param persisted + * a persisted boolean flag + * @return a new instance + */ + FileSortedMap newInstance(Comparator comparator, RewriteStrategy rewriteStrategy, SortedMapFileHandler handler, boolean persisted); + + /** + * Create an unpersisted sorted map (still in memory) + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + * @return a new instance + */ + FileSortedMap newInstance(SortedMap map, SortedMapFileHandler handler); + + /** + * factory method + * + * @param map + * the sorted map + * @param handler + * the sorted map file handler + * @param persist + * a persisted boolean flag + * @return a new instance + * @throws IOException + * for problems with read/write + */ + FileSortedMap newInstance(SortedMap map, SortedMapFileHandler handler, boolean persist) throws IOException; + } + + /** + * A sorted map input stream + * + * @param + * key of the map + * @param + * value of the map + */ + public interface SortedMapInputStream extends AutoCloseable { + Map.Entry readObject() throws IOException; + + int readSize() throws IOException; + + void close(); + } + + /** + * A sorted map output stream + * + * @param + * key of the map + * @param + * value of the map + */ + public interface SortedMapOutputStream extends AutoCloseable { + void writeObject(K key, V value) throws IOException; + + void writeSize(int size) throws IOException; + + void close() throws IOException; + } + + /** + * A factory that will provide the input stream and output stream to the same underlying file. + * + */ + public interface SortedMapFileHandler { + /** + * Return the input stream + * + * @return the input stream + * @throws IOException + * for problems with read/write + */ + InputStream getInputStream() throws IOException; + + /** + * Return the output stream + * + * @return the sorted map output stream + * @throws IOException + * for problems with read/write + */ + OutputStream getOutputStream() throws IOException; + + /** + * Get the persistent verification options + * + * @return the persistent verification options + */ + FileSortedSet.PersistOptions getPersistOptions(); + + long getSize(); + + void deleteFile(); + } + + /** + * A factory that will provide the input stream and output stream to the same underlying file. + * + */ + public interface TypedSortedMapFileHandler { + /** + * Return the input stream + * + * @return the input stream + * @throws IOException + * for problems with read/write + */ + SortedMapInputStream getInputStream() throws IOException; + + /** + * Return the output stream + * + * @return the sorted map output stream + * @throws IOException + * for problems with read/write + */ + SortedMapOutputStream getOutputStream() throws IOException; + + /** + * Get the persistent verification options + * + * @return persistent verification options + */ + FileSortedSet.PersistOptions getPersistOptions(); + + long getSize(); + + void deleteFile(); + } + + /** + * A factory that will provide the input stream and output stream to the same underlying file. An additional input stream method allows for creating a + * stream submap. + * + */ + public interface BoundedTypedSortedMapFileHandler extends TypedSortedMapFileHandler { + /** + * Return the input stream + * + * @return the input stream + * @param start + * start point + * @param end + * end point + * @throws IOException + * for problems with read/write + */ + SortedMapInputStream getInputStream(K start, K end) throws IOException; + } + + /** + * A default implementation for a bounded typed sorted map + */ + public class DefaultBoundedTypedSortedMapFileHandler implements BoundedTypedSortedMapFileHandler { + @Override + public SortedMapInputStream getInputStream(K start, K end) throws IOException { + if (handler instanceof BoundedTypedSortedMapFileHandler) { + return ((BoundedTypedSortedMapFileHandler) handler).getInputStream(start, end); + } else { + return new BoundedInputStream(handler.getInputStream(), start, end); + } + } + + @Override + public SortedMapInputStream getInputStream() throws IOException { + return handler.getInputStream(); + } + + @Override + public SortedMapOutputStream getOutputStream() throws IOException { + return handler.getOutputStream(); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return handler.getPersistOptions(); + } + + @Override + public long getSize() { + return handler.getSize(); + } + + @Override + public void deleteFile() { + handler.deleteFile(); + } + } + + /** + * An input stream that supports bounding the objects. Used when the underlying stream does not already support bounding. + */ + public class BoundedInputStream implements SortedMapInputStream { + private final SortedMapInputStream delegate; + private final K from; + private final K to; + + public BoundedInputStream(SortedMapInputStream stream, K from, K to) { + this.delegate = stream; + this.from = from; + this.to = to; + } + + @Override + public Map.Entry readObject() throws IOException { + Map.Entry o = delegate.readObject(); + while ((o != null) && (from != null) && (compare(o.getKey(), from) < 0)) { + o = delegate.readObject(); + } + if (o == null || (to != null && compare(o.getKey(), to) >= 0)) { + return null; + } else { + return o; + } + } + + @Override + public int readSize() throws IOException { + return delegate.readSize(); + } + + @Override + public void close() { + delegate.close(); + } + } + + public interface RewriteStrategy { + /** + * Determine if the object should be rewritten + * + * @param key + * The key + * @param original + * The original value + * @param update + * The updated value + * @return true of the original should be replaced with the update + */ + boolean rewrite(K key, V original, V update); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/HdfsBackedSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/HdfsBackedSortedMap.java new file mode 100644 index 00000000000..215aeb490b0 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/HdfsBackedSortedMap.java @@ -0,0 +1,301 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.Path; +import org.apache.log4j.Logger; + +import datawave.query.iterator.ivarator.IvaratorCacheDir; +import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; +import datawave.query.util.sortedset.FileSortedSet; + +/** + * Sorted map backed by HDFS + * + * @param + * key of the map + * @param + * value of the map + */ +public class HdfsBackedSortedMap extends BufferedFileBackedSortedMap { + private static final Logger log = Logger.getLogger(HdfsBackedSortedMap.class); + private static final String FILENAME_PREFIX = "SortedMapFile."; + + public static class Builder,K,V> extends BufferedFileBackedSortedMap.Builder { + private List ivaratorCacheDirs; + private String uniqueSubPath; + private FileSortedSet.PersistOptions persistOptions; + + public Builder() { + // change the default buffer persist threshold + withBufferPersistThreshold(10_000); + } + + @Override + @SuppressWarnings("unchecked") + protected B self() { + return (B) this; + } + + public B withIvaratorCacheDirs(List ivaratorCacheDirs) { + this.ivaratorCacheDirs = ivaratorCacheDirs; + return self(); + } + + public B withUniqueSubPath(String uniqueSubPath) { + this.uniqueSubPath = uniqueSubPath; + return self(); + } + + public B withPersistOptions(FileSortedSet.PersistOptions persistOptions) { + this.persistOptions = persistOptions; + return self(); + } + + public HdfsBackedSortedMap build() throws IOException { + return new HdfsBackedSortedMap<>(this); + } + } + + public static HdfsBackedSortedMap.Builder builder() { + return new HdfsBackedSortedMap.Builder<>(); + } + + protected HdfsBackedSortedMap(HdfsBackedSortedMap other) { + super(other); + } + + protected HdfsBackedSortedMap(Builder builder) throws IOException { + super(builder); + List factories = createFileHandlerFactories(builder.ivaratorCacheDirs, builder.uniqueSubPath, builder.persistOptions); + // update the parent handler factories (list of SortedMapFileHandlerFactory) + this.handlerFactories = (List) factories; + // for each of the handler factories, check to see if there are any existing files we should load + for (SortedMapHdfsFileHandlerFactory handlerFactory : factories) { + SortedMapHdfsFileHandlerFactory hdfsHandlerFactory = (SortedMapHdfsFileHandlerFactory) handlerFactory; + FileSystem fs = hdfsHandlerFactory.getFs(); + int count = 0; + + // if the directory already exists, load up this sorted map with any existing files + if (fs.exists(hdfsHandlerFactory.getUniqueDir())) { + FileStatus[] files = fs.listStatus(hdfsHandlerFactory.getUniqueDir()); + if (files != null) { + for (FileStatus file : files) { + if (!file.isDir() && file.getPath().getName().startsWith(FILENAME_PREFIX)) { + count++; + addMap(mapFactory.newInstance(comparator, getRewriteStrategy(), + new SortedMapHdfsFileHandler(fs, file.getPath(), builder.persistOptions), true)); + } + } + } + + hdfsHandlerFactory.mapFileCount(count); + } + } + } + + private static List createFileHandlerFactories(List ivaratorCacheDirs, String uniqueSubPath, + FileSortedSet.PersistOptions persistOptions) { + List fileHandlerFactories = new ArrayList<>(); + for (IvaratorCacheDir ivaratorCacheDir : ivaratorCacheDirs) { + fileHandlerFactories.add(new SortedMapHdfsFileHandlerFactory(ivaratorCacheDir, uniqueSubPath, persistOptions)); + } + return fileHandlerFactories; + } + + @Override + public void clear() { + // This will be a new ArrayList<>() containing the same FileSortedMaps + List> SortedMaps = super.getMaps(); + // Clear will call clear on each of the FileSortedMaps, clear the container, and null the buffer + super.clear(); + // We should still be able to access the FileSortedMap objects to get their handler because we + // have a copy of the object in 'SortedMaps' + for (FileSortedMap fss : SortedMaps) { + if (fss.isPersisted() && fss.handler instanceof SortedMapHdfsFileHandler) { + ((SortedMapHdfsFileHandler) fss.handler).deleteFile(); + } + } + } + + public static class SortedMapHdfsFileHandlerFactory implements SortedMapFileHandlerFactory { + final private IvaratorCacheDir ivaratorCacheDir; + private String uniqueSubPath; + private int fileCount = 0; + private FileSortedSet.PersistOptions persistOptions; + + public SortedMapHdfsFileHandlerFactory(IvaratorCacheDir ivaratorCacheDir, String uniqueSubPath, FileSortedSet.PersistOptions persistOptions) { + this.ivaratorCacheDir = ivaratorCacheDir; + this.uniqueSubPath = uniqueSubPath; + this.persistOptions = persistOptions; + } + + public IvaratorCacheDir getIvaratorCacheDir() { + return ivaratorCacheDir; + } + + public FileSystem getFs() { + return ivaratorCacheDir.getFs(); + } + + public Path getUniqueDir() { + return new Path(ivaratorCacheDir.getPathURI(), uniqueSubPath); + } + + public int getFileCount() { + return fileCount; + } + + void mapFileCount(int count) { + this.fileCount = count; + } + + public boolean isValid() { + FsStatus fsStatus = null; + try { + fsStatus = ivaratorCacheDir.getFs().getStatus(); + } catch (IOException e) { + log.warn("Unable to determine status of the filesystem: " + ivaratorCacheDir.getFs()); + } + + // determine whether this fs is a good candidate + if (fsStatus != null) { + long availableStorageMiB = fsStatus.getRemaining() / 0x100000L; + double availableStoragePercent = (double) fsStatus.getRemaining() / fsStatus.getCapacity(); + + // if we are using less than our storage limit, the cache dir is valid + return availableStorageMiB >= ivaratorCacheDir.getConfig().getMinAvailableStorageMiB() + && availableStoragePercent >= ivaratorCacheDir.getConfig().getMinAvailableStoragePercent(); + } + + return false; + } + + @Override + public FileSortedMap.SortedMapFileHandler createHandler() throws IOException { + FileSystem fs = getFs(); + Path uniqueDir = getUniqueDir(); + + // Lazily create the required ivarator cache dirs. + ensureDirsCreated(); + + // generate a unique file name + fileCount++; + Path file = new Path(uniqueDir, FILENAME_PREFIX + fileCount + '.' + System.currentTimeMillis()); + return new SortedMapHdfsFileHandler(fs, file, persistOptions); + } + + private void ensureDirsCreated() throws IOException { + IvaratorCacheDirConfig config = ivaratorCacheDir.getConfig(); + if (config.isValid()) { + ensureCreation(new Path(ivaratorCacheDir.getPathURI())); + ensureCreation(getUniqueDir()); + } else { + throw new IOException("Unable to create Ivarator Cache Dir for invalid config: " + config); + } + } + + private void ensureCreation(Path path) throws IOException { + try { + FileSystem fs = getFs(); + if (!fs.exists(path)) { + // Attempt to create the required directory if it does not exist. + if (!fs.mkdirs(path)) { + throw new IOException("Unable to mkdirs: fs.mkdir(" + path + ")->false"); + } + } + } catch (MalformedURLException e) { + throw new IOException("Unable to load hadoop configuration", e); + } catch (Exception e) { + log.warn("Unable to create directory [" + path + "] in file system [" + getFs() + "]", e); + throw new IOException("Unable to create directory [" + path + "] in file system [" + getFs() + "]", e); + } + } + + @Override + public String toString() { + return getUniqueDir() + " (fileCount=" + fileCount + ')'; + } + + } + + public static class SortedMapHdfsFileHandler implements FileSortedMap.SortedMapFileHandler { + private FileSystem fs; + private Path file; + private FileSortedSet.PersistOptions persistOptions; + + public SortedMapHdfsFileHandler(FileSystem fs, Path file, FileSortedSet.PersistOptions persistOptions) { + this.fs = fs; + this.file = file; + this.persistOptions = persistOptions; + } + + private String getScheme() { + String scheme = file.toUri().getScheme(); + if (scheme == null) { + scheme = fs.getScheme(); + } + return scheme; + } + + @Override + public InputStream getInputStream() throws IOException { + if (log.isDebugEnabled()) { + log.debug("Reading " + file); + } + return fs.open(file); + } + + @Override + public OutputStream getOutputStream() throws IOException { + if (log.isDebugEnabled()) { + log.debug("Creating " + file); + } + return fs.create(file); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return persistOptions; + } + + @Override + public long getSize() { + try { + FileStatus status = fs.getFileStatus(file); + return status.getLen(); + } catch (Exception e) { + log.warn("Failed to verify file " + file, e); + return -1; + } + } + + @Override + public void deleteFile() { + try { + if (log.isDebugEnabled()) { + log.debug("Deleting " + file); + } + if (!fs.delete(file, true)) { + log.error("Failed to delete file " + file + ": delete returned false"); + } + } catch (IOException e) { + log.error("Failed to delete file " + file, e); + } + } + + @Override + public String toString() { + return file.toString(); + } + + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/MultiMapBackedSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/MultiMapBackedSortedMap.java new file mode 100644 index 00000000000..e174e5def30 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/MultiMapBackedSortedMap.java @@ -0,0 +1,407 @@ +package datawave.query.util.sortedmap; + +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.commons.collections4.keyvalue.UnmodifiableMapEntry; +import org.apache.commons.lang3.builder.EqualsBuilder; + +import com.google.common.collect.Iterators; + +import datawave.webservice.query.exception.DatawaveErrorCode; +import datawave.webservice.query.exception.QueryException; + +/* + * This is a sorted map that is backed by multiple underlying sorted maps. It is assumed that the underlying + * sorted maps contain the same type of underlying value, and they use the same comparator. The rewrite + * strategy will be used if the underlying sorted maps are RewritableSortedMap implementations. + * @param + * key of the map + * @param + * value of the map + */ +public class MultiMapBackedSortedMap extends AbstractMap implements RewritableSortedMap { + protected List> maps = new ArrayList<>(); + protected Comparator comparator = null; + protected FileSortedMap.RewriteStrategy rewriteStrategy = null; + + public MultiMapBackedSortedMap() {} + + public MultiMapBackedSortedMap(List> maps) { + for (SortedMap map : maps) { + addMap(map); + } + } + + public void addMap(SortedMap map) { + if (maps.isEmpty()) { + updateConfiguration(map); + } else { + verifyConfiguration(map); + } + maps.add(map); + } + + private void updateConfiguration(SortedMap map) { + comparator = getComparator(map); + rewriteStrategy = getRewriteStrategy(map); + } + + private void verifyConfiguration(SortedMap map) { + if (!(new EqualsBuilder().append(getClass(comparator), getClass(getComparator(map))) + .append(getClass(rewriteStrategy), getClass(getRewriteStrategy(map))).isEquals())) { + throw new IllegalArgumentException("map being added does not match the comparator and rewriteStrategy of the existing maps"); + } + } + + private Class getClass(Object obj) { + return (obj == null ? null : obj.getClass()); + } + + private FileSortedMap.RewriteStrategy getRewriteStrategy(SortedMap map) { + if (map instanceof RewritableSortedMap) { + return ((RewritableSortedMap) map).getRewriteStrategy(); + } + return null; + } + + private Comparator getComparator(SortedMap map) { + return (Comparator) (map.comparator()); + } + + /** + * Get the underlying maps + * + * @return the maps + */ + public List> getMaps() { + return maps; + } + + /** + * Return the size of this map. NOTE that this is somewhat expensive as we require iterating over the maps to determine the true value (see + * MergeSortIterator); + */ + @Override + public int size() { + return Iterators.size(iterator()); + } + + @Override + public boolean isEmpty() { + if (maps == null) { + return true; + } + for (SortedMap map : maps) { + if (map != null && !map.isEmpty()) { + return false; + } + } + return true; + } + + @Override + public boolean containsKey(Object o) { + for (SortedMap map : maps) { + if (map.containsKey(o)) { + return true; + } + } + return false; + } + + protected Iterator> iterator() { + return new MergeSortIterator(); + } + + @Override + public V remove(Object o) { + V value = null; + for (SortedMap map : maps) { + V testValue = map.remove(o); + if (testValue != null) { + if (value != null) { + if (rewriteStrategy == null || rewriteStrategy.rewrite((K) o, value, testValue)) { + value = testValue; + } + } else { + value = testValue; + } + } + } + return value; + } + + @Override + public void clear() { + for (SortedMap map : this.maps) { + try { + map.clear(); + } catch (Exception e) { + // error clearing sorted map + // possibility of FileNotFoundException, etc being + // caught and re-thrown as an exception + } + } + this.maps.clear(); + } + + @Override + public Set> entrySet() { + return new AbstractSet<>() { + + @Override + public Iterator> iterator() { + return MultiMapBackedSortedMap.this.iterator(); + } + + @Override + public int size() { + return MultiMapBackedSortedMap.this.size(); + } + }; + } + + @Override + public Comparator comparator() { + return comparator; + } + + @Override + public RewritableSortedMap subMap(K fromElement, K toElement) { + MultiMapBackedSortedMap submap = new MultiMapBackedSortedMap<>(); + submap.setRewriteStrategy(rewriteStrategy); + for (SortedMap map : maps) { + submap.addMap(map.subMap(fromElement, toElement)); + } + return submap; + } + + @Override + public RewritableSortedMap headMap(K toElement) { + MultiMapBackedSortedMap submap = new MultiMapBackedSortedMap<>(); + submap.setRewriteStrategy(rewriteStrategy); + for (SortedMap map : maps) { + submap.addMap(map.headMap(toElement)); + } + return submap; + } + + @Override + public RewritableSortedMap tailMap(K fromElement) { + MultiMapBackedSortedMap submap = new MultiMapBackedSortedMap<>(); + submap.setRewriteStrategy(rewriteStrategy); + for (SortedMap map : maps) { + submap.addMap(map.tailMap(fromElement)); + } + return submap; + } + + @Override + public K firstKey() throws NoSuchElementException { + if (maps == null || maps.isEmpty()) { + throw new NoSuchElementException("No elements in input maps"); + } + SortedSet firstSet = new TreeSet<>(comparator()); + for (SortedMap map : maps) { + if (map != null && !map.isEmpty()) { + K s = map.firstKey(); + firstSet.add(s); + } + } + if (firstSet.isEmpty()) { + throw new NoSuchElementException("No elements in input maps"); + } + return firstSet.first(); + } + + @Override + public K lastKey() throws NoSuchElementException { + if (maps == null || maps.isEmpty()) { + throw new NoSuchElementException("No elements in input maps"); + } + SortedSet lastSet = new TreeSet<>(comparator()); + for (SortedMap map : maps) { + if (map != null && !map.isEmpty()) { + K s = map.lastKey(); + lastSet.add(s); + } + } + if (lastSet.isEmpty()) { + throw new NoSuchElementException("No elements in input maps"); + } + return lastSet.last(); + } + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return rewriteStrategy; + } + + @Override + public void setRewriteStrategy(FileSortedMap.RewriteStrategy rewriteStrategy) { + this.rewriteStrategy = rewriteStrategy; + } + + @Override + public V get(Object o) { + V value = null; + for (SortedMap map : maps) { + V testValue = map.get(o); + if (testValue != null) { + if (value != null) { + if (rewriteStrategy == null || rewriteStrategy.rewrite((K) o, value, testValue)) { + value = testValue; + } + } else { + value = testValue; + } + } + } + return value; + } + + /** + * This is an iterator that will return a sorted map of items (no dups) from an underlying map of sorted maps. + */ + public class MergeSortIterator implements Iterator> { + + // this is the entire set of iterators + private List>> iterators = new ArrayList<>(); + // this is the list of the last key from each of the iterators available to use + private List lastList = new ArrayList<>(); + // booleans denoting if an iterator has been completely used up + private boolean[] finished = null; + // This map holds the key/values to be returned next. + private SortedMap map = null; + private boolean populated = false; + private K nextKey = null; + private V nextValue = null; + // This is the set of iterators that contributed to the last value returned + private List>> nextIterators = new ArrayList<>(); + + public MergeSortIterator() { + for (SortedMap map : maps) { + Iterator> it = map.entrySet().iterator(); + iterators.add(it); + nextIterators.add(it); + lastList.add(null); + } + this.map = new TreeMap(comparator); + this.finished = new boolean[iterators.size()]; + } + + @Override + public boolean hasNext() { + if (!map.isEmpty()) { + return true; + } + for (Iterator> it : nextIterators) { + if (it != null && it.hasNext()) { + return true; + } + } + return false; + } + + @Override + public Entry next() { + populate(); + if (!populated) { + QueryException qe = new QueryException(DatawaveErrorCode.FETCH_NEXT_ELEMENT_ERROR); + throw (NoSuchElementException) (new NoSuchElementException().initCause(qe)); + } + return new UnmodifiableMapEntry<>(nextKey, nextValue); + } + + @Override + public void remove() { + if (!populated) { + throw new IllegalStateException(); + } + Exception e = null; + for (Iterator> it : nextIterators) { + if (it != null) { + try { + it.remove(); + } catch (UnsupportedOperationException uoe) { + e = uoe; + } + } + } + populated = false; + if (e != null) { + throw new UnsupportedOperationException("One or more of the underlying sets does not support this operation", e); + } + } + + /* Some utility methods */ + private boolean equals(K o1, K o2) { + if (o1 == null) { + return o2 == null; + } else if (o2 == null) { + return false; + } else { + if (map.comparator() == null) { + return o1.equals(o2); + } else { + return map.comparator().compare(o1, o2) == 0; + } + } + } + + private void populate() { + populated = false; + + // update the last value for those iterators contributing to + // the last returned value + for (int i = 0; i < nextIterators.size(); i++) { + if (nextIterators.get(i) != null) { + Iterator> it = nextIterators.get(i); + if (it.hasNext()) { + Entry val = it.next(); + // remember the last key returned + lastList.set(i, val.getKey()); + if ((rewriteStrategy == null) || (!map.containsKey(val.getKey())) + || (rewriteStrategy.rewrite(val.getKey(), map.get(val.getKey()), val.getValue()))) { + // update the map if the rewrite policy allows (or a new key) + map.put(val.getKey(), val.getValue()); + } + } else { + // remember that we are done with this iterator + lastList.set(i, null); + finished[i] = true; + } + } + } + + if (!map.isEmpty()) { + // now get the next key/value from the map + nextKey = map.firstKey(); + nextValue = map.remove(nextKey); + // and update the list of iterators that contributed to this next key + for (int i = 0; i < iterators.size(); i++) { + if (!finished[i] && equals(nextKey, lastList.get(i))) { + nextIterators.set(i, iterators.get(i)); + } else { + // if the iterator is finished, or did not contribute to the value being returned + // then null it out since the value returned is already in the map to compare + // on the next round + nextIterators.set(i, null); + } + } + populated = true; + } + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/RewritableSortedMap.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/RewritableSortedMap.java new file mode 100644 index 00000000000..68063a1855e --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/RewritableSortedMap.java @@ -0,0 +1,19 @@ +package datawave.query.util.sortedmap; + +import java.util.SortedMap; + +/** + * A rewritable sorted map which will replace the value for a key dependent on a RewriteStrategy + * + * @param + * key of the map + * @param + * value of the map + */ +public interface RewritableSortedMap extends SortedMap { + + FileSortedMap.RewriteStrategy getRewriteStrategy(); + + void setRewriteStrategy(FileSortedMap.RewriteStrategy rewriteStrategy); + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/SortedMapTempFileHandler.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/SortedMapTempFileHandler.java new file mode 100644 index 00000000000..ef3815c3c6c --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/SortedMapTempFileHandler.java @@ -0,0 +1,63 @@ +package datawave.query.util.sortedmap; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import datawave.query.util.sortedset.FileSortedSet; + +/** + * A sorted set file handler factory that uses temporary local based files. + */ +public class SortedMapTempFileHandler implements FileSortedMap.SortedMapFileHandler { + private final FileSystem fs; + private final File file; + private final Path path; + + public SortedMapTempFileHandler() throws IOException { + this.file = File.createTempFile("SortedSet", ".bin"); + this.file.deleteOnExit(); + this.path = new Path(file.toURI()); + Configuration conf = new Configuration(); + this.fs = path.getFileSystem(conf); + } + + public File getFile() { + return file; + } + + @Override + public InputStream getInputStream() throws IOException { + return fs.open(path); + } + + public OutputStream getOutputStream() throws IOException { + return fs.create(path, true); + } + + @Override + public FileSortedSet.PersistOptions getPersistOptions() { + return new FileSortedSet.PersistOptions(true, false); + } + + @Override + public long getSize() { + return (file.exists() ? file.length() : -1); + } + + @Override + public void deleteFile() { + this.file.delete(); + } + + @Override + public String toString() { + return file.toString(); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumentTransforms.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumentTransforms.java new file mode 100644 index 00000000000..e5eb3524aef --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumentTransforms.java @@ -0,0 +1,83 @@ +package datawave.query.util.sortedmap.rfile; + +import java.io.ByteArrayInputStream; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; + +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Document; +import datawave.query.function.deserializer.DocumentDeserializer; +import datawave.query.function.deserializer.KryoDocumentDeserializer; +import datawave.query.function.serializer.DocumentSerializer; +import datawave.query.function.serializer.KryoDocumentSerializer; + +public class KeyValueByteDocumentTransforms { + + private static KryoDocumentSerializer serializer = new KryoDocumentSerializer(false, true); + private static KryoDocumentDeserializer deserializer = new KryoDocumentDeserializer(); + + public static byte[] keyToByte(Key key) { + if (key == null) { + return null; + } + return key.getRow().getBytes(); + } + + public static Key byteToKey(byte[] bytes) { + if (bytes == null) { + return null; + } + return new Key(bytes); + } + + public static Value documentToValue(Document doc) { + if (doc == null) { + return null; + } + byte[] document; + synchronized (serializer) { + document = serializer.serialize(doc); + } + return new Value(document); + } + + public static Document valueToDocument(Value value) { + if (value == null) { + return null; + } + synchronized (deserializer) { + return deserializer.deserialize(new ByteArrayInputStream(value.get())); + } + } + + public static Map.Entry keyValueToByteDocument(Map.Entry keyValue) { + if (keyValue == null) { + return null; + } + return new UnmodifiableMapEntry(keyToByte(keyValue.getKey()), valueToDocument(keyValue.getValue())); + } + + public static Map.Entry byteDocumentToKeyValue(Map.Entry byteKey) { + if (byteKey == null) { + return null; + } + return new UnmodifiableMapEntry(byteToKey(byteKey.getKey()), documentToValue(byteKey.getValue())); + } + + public static Map.Entry keyValueToKeyDocument(Map.Entry keyValue) { + if (keyValue == null) { + return null; + } + return new UnmodifiableMapEntry(keyValue.getKey(), valueToDocument(keyValue.getValue())); + } + + public static Map.Entry KeyDocumentToKeyValue(Map.Entry byteKey) { + if (byteKey == null) { + return null; + } + return new UnmodifiableMapEntry(byteKey.getKey(), documentToValue(byteKey.getValue())); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentInputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentInputStream.java new file mode 100644 index 00000000000..0df7adb2b48 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentInputStream.java @@ -0,0 +1,30 @@ +package datawave.query.util.sortedmap.rfile; + +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.byteToKey; +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.keyValueToByteDocument; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileByteDocumentInputStream extends RFileKeyValueInputStreamBase implements FileSortedMap.SortedMapInputStream { + + public RFileByteDocumentInputStream(InputStream inputStream, long length) throws IOException { + super(inputStream, length); + } + + public RFileByteDocumentInputStream(InputStream inputStream, long length, byte[] start, byte[] end) throws IOException { + super(inputStream, length, byteToKey(start), byteToKey(end)); + } + + @Override + public Map.Entry readObject() throws IOException { + return keyValueToByteDocument(readKeyValue()); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentOutputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentOutputStream.java new file mode 100644 index 00000000000..774593be151 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileByteDocumentOutputStream.java @@ -0,0 +1,26 @@ +package datawave.query.util.sortedmap.rfile; + +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.byteDocumentToKeyValue; +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.byteToKey; +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.documentToValue; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileByteDocumentOutputStream extends RFileKeyValueOutputStreamBase implements FileSortedMap.SortedMapOutputStream { + public RFileByteDocumentOutputStream(OutputStream stream) throws IOException { + super(stream); + } + + @Override + public void writeObject(byte[] k, Document v) throws IOException { + writeKeyValue(byteToKey(k), documentToValue(v)); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentInputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentInputStream.java new file mode 100644 index 00000000000..1475c0bbc74 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentInputStream.java @@ -0,0 +1,28 @@ +package datawave.query.util.sortedmap.rfile; + +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.keyValueToKeyDocument; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileKeyDocumentInputStream extends RFileKeyValueInputStreamBase implements FileSortedMap.SortedMapInputStream { + + public RFileKeyDocumentInputStream(InputStream inputStream, long length) throws IOException { + super(inputStream, length); + } + + public RFileKeyDocumentInputStream(InputStream inputStream, long length, Key start, Key end) throws IOException { + super(inputStream, length, start, end); + } + + @Override + public Map.Entry readObject() throws IOException { + return keyValueToKeyDocument(super.readKeyValue()); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentOutputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentOutputStream.java new file mode 100644 index 00000000000..8a5b8f6043b --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyDocumentOutputStream.java @@ -0,0 +1,22 @@ +package datawave.query.util.sortedmap.rfile; + +import static datawave.query.util.sortedmap.rfile.KeyValueByteDocumentTransforms.documentToValue; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.accumulo.core.data.Key; + +import datawave.query.attributes.Document; +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileKeyDocumentOutputStream extends RFileKeyValueOutputStreamBase implements FileSortedMap.SortedMapOutputStream { + public RFileKeyDocumentOutputStream(OutputStream stream) throws IOException { + super(stream); + } + + @Override + public void writeObject(Key k, Document v) throws IOException { + writeKeyValue(k, documentToValue(v)); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStream.java new file mode 100644 index 00000000000..e241d652fcd --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStream.java @@ -0,0 +1,26 @@ +package datawave.query.util.sortedmap.rfile; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileKeyValueInputStream extends RFileKeyValueInputStreamBase implements FileSortedMap.SortedMapInputStream { + + public RFileKeyValueInputStream(InputStream inputStream, long length) throws IOException { + super(inputStream, length); + } + + public RFileKeyValueInputStream(InputStream inputStream, long length, Key start, Key end) throws IOException { + super(inputStream, length, start, end); + } + + @Override + public Map.Entry readObject() throws IOException { + return super.readKeyValue(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStreamBase.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStreamBase.java new file mode 100644 index 00000000000..653cb7de40f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueInputStreamBase.java @@ -0,0 +1,79 @@ +package datawave.query.util.sortedmap.rfile; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.Map; + +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.rfile.RFile; +import org.apache.accumulo.core.client.rfile.RFileSource; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; + +public abstract class RFileKeyValueInputStreamBase { + private final InputStream inputStream; + private final long length; + private Key start; + private Key end; + private Scanner reader; + private Iterator> iterator; + private int size = -1; + private static final Range ALL = new Range(); + + public RFileKeyValueInputStreamBase(InputStream inputStream, long length) throws IOException { + this.inputStream = inputStream; + this.length = length; + } + + public RFileKeyValueInputStreamBase(InputStream inputStream, long length, Key start, Key end) throws IOException { + this(inputStream, length); + this.start = start; + this.end = end; + } + + private Iterator> keyValueIterator() { + if (iterator == null) { + Range r = ALL; + if (start != null || end != null) { + r = new Range(start, true, end, false); + } + reader = RFile.newScanner().from(new RFileSource(inputStream, length)).withBounds(r).withoutSystemIterators().build(); + iterator = reader.iterator(); + } + return iterator; + } + + public Map.Entry readKeyValue() throws IOException { + if (keyValueIterator().hasNext()) { + Map.Entry next = keyValueIterator().next(); + if (RFileKeyValueOutputStreamBase.SizeKeyUtil.isSizeKey(next.getKey())) { + size = RFileKeyValueOutputStreamBase.SizeKeyUtil.getSize(next.getKey()); + next = null; + } + return next; + } + return null; + } + + public int readSize() throws IOException { + if (size < 0) { + if (iterator != null) { + throw new IllegalStateException("Cannot read size from undetermined location in stream"); + } + reader = RFile.newScanner().from(new RFileSource(inputStream, length)).withBounds(new Range(RFileKeyValueOutputStreamBase.SizeKeyUtil.SIZE_ROW)) + .build(); + iterator = reader.iterator(); + size = RFileKeyValueOutputStreamBase.SizeKeyUtil.getSize(iterator.next().getKey()); + } + return size; + } + + public void close() { + if (reader != null) { + reader.close(); + reader = null; + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStream.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStream.java new file mode 100644 index 00000000000..c42d045fab5 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStream.java @@ -0,0 +1,20 @@ +package datawave.query.util.sortedmap.rfile; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +import datawave.query.util.sortedmap.FileSortedMap; + +public class RFileKeyValueOutputStream extends RFileKeyValueOutputStreamBase implements FileSortedMap.SortedMapOutputStream { + public RFileKeyValueOutputStream(OutputStream stream) throws IOException { + super(stream); + } + + @Override + public void writeObject(Key k, Value v) throws IOException { + writeKeyValue(k, v); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStreamBase.java b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStreamBase.java new file mode 100644 index 00000000000..ca4b8845beb --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/sortedmap/rfile/RFileKeyValueOutputStreamBase.java @@ -0,0 +1,54 @@ +package datawave.query.util.sortedmap.rfile; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.accumulo.core.client.rfile.RFile; +import org.apache.accumulo.core.client.rfile.RFileWriter; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.hadoop.io.Text; + +public abstract class RFileKeyValueOutputStreamBase { + private final OutputStream outputStream; + private RFileWriter writer; + private static final Value EMPTY_VALUE = new Value(new byte[0]); + + public RFileKeyValueOutputStreamBase(OutputStream outputStream) throws IOException { + this.outputStream = outputStream; + this.writer = RFile.newWriter().to(outputStream).withVisibilityCacheSize(10).build(); + } + + public void writeKeyValue(Key k, Value v) throws IOException { + writer.append(k, v); + } + + public void writeSize(int size) throws IOException { + writer.append(SizeKeyUtil.getKey(size), EMPTY_VALUE); + } + + public void close() throws IOException { + if (writer != null) { + writer.close(); + writer = null; + } + } + + public static class SizeKeyUtil { + private static final char MAX_UNICODE = (char) Character.MAX_CODE_POINT; + public static final Text SIZE_ROW = new Text(MAX_UNICODE + "_SIZE_" + MAX_UNICODE); + + public static Key getKey(int size) { + return new Key(SIZE_ROW, new Text(Integer.toString(size))); + } + + public static boolean isSizeKey(Key key) { + return key.getRow().equals(SIZE_ROW); + } + + public static int getSize(Key key) { + return Integer.parseInt(key.getColumnFamily().toString()); + } + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java b/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java index 83a9087f083..42e442f4276 100644 --- a/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/UniqueTest.java @@ -1,5 +1,6 @@ package datawave.query; +import java.net.URL; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -152,6 +153,10 @@ public void setup() { TimeZone.setDefault(TimeZone.getTimeZone("GMT")); logic.setFullTableScanEnabled(true); + // setup the hadoop configuration + URL hadoopConfig = this.getClass().getResource("/testhadoop.config"); + logic.setHdfsSiteConfigURLs(hadoopConfig.toExternalForm()); + logic.setQueryExecutionForPageTimeout(300000000000000L); deserializer = new KryoDocumentDeserializer(); } @@ -182,7 +187,10 @@ protected void runTestQueryWithUniqueness(Set> expected, String quer TransformIterator iter = new DatawaveTransformIterator(logic.iterator(), transformer); List eventList = new ArrayList<>(); while (iter.hasNext()) { - eventList.add(iter.next()); + Object o = iter.next(); + if (o != null) { + eventList.add(o); + } } BaseQueryResponse response = transformer.createResponse(eventList); @@ -195,6 +203,9 @@ protected void runTestQueryWithUniqueness(Set> expected, String quer Assert.assertTrue(response instanceof DefaultEventQueryResponse); DefaultEventQueryResponse eventQueryResponse = (DefaultEventQueryResponse) response; + // copy expected set to avoid modifying parameter passed in + expected = new HashSet<>(expected); + for (EventBase event : eventQueryResponse.getEvents()) { boolean found = false; for (Iterator> it = expected.iterator(); it.hasNext();) { @@ -206,9 +217,9 @@ protected void runTestQueryWithUniqueness(Set> expected, String quer break; } } - Assert.assertTrue(found); + Assert.assertTrue("Failed to find " + event.getMetadata().getInternalId() + " in expected results", found); } - Assert.assertTrue(expected.isEmpty()); + Assert.assertTrue("Failed to find all expected results. Missing " + expected, expected.isEmpty()); } @Test @@ -226,12 +237,14 @@ public void testUniqueness() throws Exception { extraParameters.put("unique.fields", "DEATH_DATE,$MAGIC"); runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); extraParameters.put("unique.fields", "$DEATH_DATE,BIRTH_DATE"); runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); @@ -271,12 +284,14 @@ public void testUniquenessUsingFunction() throws Exception { runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); queryString = "UUID =~ '^[CS].*' && f:unique('DEATH_DATE','$BIRTH_DATE')"; + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); queryString = "UUID =~ '^[CS].*' && f:unique('death_date','$birth_date')"; + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); @@ -298,12 +313,14 @@ public void testUniquenessUsingLuceneFunction() throws Exception { runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); queryString = "UUID:/^[CS].*/ AND #UNIQUE(DEATH_DATE,$BIRTH_DATE)"; + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); queryString = "UUID:/^[CS].*/ AND #UNIQUE(death_date,birth_date)"; + expected.clear(); expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); @@ -355,4 +372,77 @@ public void testUniquenessWithModelAliases() throws Exception { String queryString = "UUID:/^[CS].*/ AND #UNIQUE(BOTH_NULL)"; runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); } + + @Test + public void testRecentUniquenessWithModelAliases() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put("query.syntax", "LUCENE"); + + Set> expected = new HashSet<>(); + expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID, WiseGuysIngest.corleoneUID, WiseGuysIngest.caponeUID)); + Date startDate = format.parse("20091231"); + Date endDate = format.parse("20150101"); + + String queryString = "UUID:/^[CS].*/ AND #MOST_RECENT_UNIQUE(BOTH_NULL)"; + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + } + + @Test + public void testMostRecentUniqueness() throws Exception { + Map extraParameters = new HashMap<>(); + extraParameters.put("include.grouping.context", "true"); + extraParameters.put(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + Date startDate = format.parse("20091231"); + Date endDate = format.parse("20150101"); + + String queryString = "UUID =~ '^[CS].*'"; + + Set> expected = new HashSet<>(); + expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); + extraParameters.put("unique.fields", "DEATH_DATE,$MAGIC"); + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + + expected.clear(); + expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); + extraParameters.put("unique.fields", "death_date,$magic"); + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + + expected.clear(); + expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); + expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); + expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); + extraParameters.put("unique.fields", "$DEATH_DATE,BIRTH_DATE"); + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + + expected.clear(); + expected.add(Sets.newHashSet(WiseGuysIngest.sopranoUID)); + expected.add(Sets.newHashSet(WiseGuysIngest.corleoneUID)); + expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); + extraParameters.put("unique.fields", "death_date,birth_date"); + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + } + + @Test + public void testHannahHypothesis() throws Exception { + Map extraParameters = new HashMap<>(); + Date startDate = format.parse("20091231"); + Date endDate = format.parse("20150101"); + + Set> expected = new HashSet<>(); + expected.add(Sets.newHashSet(WiseGuysIngest.caponeUID)); + extraParameters.put(QueryParameters.MOST_RECENT_UNIQUE, "true"); + extraParameters.put("unique.fields", "DEATH_DATE,$MAGIC"); + String queryString = "UUID =~ '^[CS].*'"; + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + + extraParameters.clear(); + extraParameters.put(QueryParameters.MOST_RECENT_UNIQUE, "true"); + queryString = "UUID =~ '^[CS].*' && f:unique(death_date,magic)"; + runTestQueryWithUniqueness(expected, queryString, startDate, endDate, extraParameters); + + this.getClass().getMethod("testHannahHypothesis").getName().replace("Hypothesis", "Theory"); + } + } diff --git a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java b/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java index a4bf1421a30..00850fba9a6 100644 --- a/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/attributes/UniqueFieldsTest.java @@ -71,7 +71,7 @@ public void testNonEmptyUniqueFieldsToString() { uniqueFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); uniqueFields.put("fieldD", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); - assertEquals("fieldA[ALL],fieldB[ALL,DAY],fieldC[HOUR],fieldD[HOUR,MINUTE]", uniqueFields.toString()); + assertEquals("FIELDA[ALL],FIELDB[ALL,DAY],FIELDC[HOUR],FIELDD[HOUR,MINUTE]", uniqueFields.toString()); } /** @@ -345,7 +345,7 @@ public void testSerialization() throws JsonProcessingException { UniqueFields uniqueFields = new UniqueFields(sortedFields); String json = objectMapper.writeValueAsString(uniqueFields); - assertEquals("\"fieldA[ALL],fieldB[ALL,DAY],fieldC[HOUR],fieldD[HOUR,MINUTE]\"", json); + assertEquals("\"FIELDA[ALL],FIELDB[ALL,DAY],FIELDC[HOUR],FIELDD[HOUR,MINUTE]\"", json); } /** @@ -387,7 +387,7 @@ public void testValueTransformation() { uniqueFields.put("fieldA", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); Set values = Sets.newHashSet("2020-01-12 15:30:45", "nonDateValue"); - SortedSet actual = Sets.newTreeSet(uniqueFields.transformValues("fieldA", values)); + SortedSet actual = Sets.newTreeSet(uniqueFields.transformValues("FIELDA", values)); assertEquals(expected, actual); } @@ -407,8 +407,6 @@ public void testDeconstructIdentifierFields() { uniqueFields.put("$FIELDB", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); uniqueFields.put("FIELDC", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); - uniqueFields.deconstructIdentifierFields(); - SortedSet actual = Sets.newTreeSet(uniqueFields.getFields()); assertEquals(expected, actual); diff --git a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java index ae9f64a1a84..ff6b304069b 100644 --- a/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/config/ShardQueryConfigurationTest.java @@ -443,6 +443,8 @@ public void setUp() throws Exception { updatedValues.put("compositeFilterFunctionsEnabled", true); defaultValues.put("uniqueFields", new UniqueFields()); updatedValues.put("uniqueFields", UniqueFields.from("FIELD_U,FIELD_V")); + defaultValues.put("uniqueCacheBufferSize", 100); + updatedValues.put("uniqueCacheBufferSize", 1000); defaultValues.put("cacheModel", false); updatedValues.put("cacheModel", true); defaultValues.put("trackSizes", true); diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java index 694984bbc54..6a0e4bf1950 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitorTest.java @@ -143,34 +143,85 @@ public void testUniqueFunction() throws ParseException { // Verify an empty function results in an empty parameter value. assertResult("f:unique_by_day()", ""); assertOption(QueryParameters.UNIQUE_FIELDS, ""); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify that fields of no specified granularity are added with the default ALL granularity. assertResult("f:unique('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL],field2[ALL],field3[ALL]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL],FIELD2[ALL],FIELD3[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify that fields with DAY granularity are added as such. assertResult("f:unique('field1[DAY]','field2[DAY]','field3[DAY]')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[DAY],field2[DAY],field3[DAY]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY],FIELD3[DAY]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify that fields with HOUR granularity are added as such. assertResult("f:unique('field1[HOUR]','field2[HOUR]','field3[HOUR]')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[HOUR],field2[HOUR],field3[HOUR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[HOUR],FIELD2[HOUR],FIELD3[HOUR]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify that fields with MINUTE granularity are added as such. assertResult("f:unique('field1[MINUTE]','field2[MINUTE]','field3[MINUTE]')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MINUTE],field2[MINUTE],field3[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE],FIELD2[MINUTE],FIELD3[MINUTE]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify that fields from multiple unique functions are merged together. assertResult("f:unique('field1','field2') AND f:unique('field2[DAY]','field3[DAY]') AND f:unique('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL],field2[ALL,DAY],field3[DAY],field4[ALL]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL],FIELD2[ALL,DAY],FIELD3[DAY],FIELD4[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Verify more complex fields with multiple granularity levels are merged together. assertResult("f:unique('field1[DAY]','field2[DAY,HOUR]','field3[HOUR,MINUTE]','field4[ALL,MINUTE]','field5')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[DAY],field2[DAY,HOUR],field3[HOUR,MINUTE],field4[ALL,MINUTE],field5[ALL]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); // Lucene will parse comma-delimited granularity levels into separate strings. Ensure it still parses correctly. assertResult("f:unique('field1[DAY]','field2[DAY','HOUR]','field3[HOUR','MINUTE]','field4[ALL','MINUTE]','field5')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[DAY],field2[DAY,HOUR],field3[HOUR,MINUTE],field4[ALL,MINUTE],field5[ALL]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, null); + } + + @Test + public void testMostRecentUniqueFunction() throws ParseException { + // Verify an empty function results in an empty parameter value. + assertResult("f:most_recent_unique_by_day()", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, ""); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify that fields of no specified granularity are added with the default ALL granularity. + assertResult("f:most_recent_unique('field1','field2','field3')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL],FIELD2[ALL],FIELD3[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify that fields with DAY granularity are added as such. + assertResult("f:most_recent_unique('field1[DAY]','field2[DAY]','field3[DAY]')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY],FIELD3[DAY]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify that fields with HOUR granularity are added as such. + assertResult("f:most_recent_unique('field1[HOUR]','field2[HOUR]','field3[HOUR]')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[HOUR],FIELD2[HOUR],FIELD3[HOUR]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify that fields with MINUTE granularity are added as such. + assertResult("f:most_recent_unique('field1[MINUTE]','field2[MINUTE]','field3[MINUTE]')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE],FIELD2[MINUTE],FIELD3[MINUTE]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify that fields from multiple unique functions are merged together. + assertResult("f:most_recent_unique('field1','field2') AND f:unique('field2[DAY]','field3[DAY]') AND f:unique('field4')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL],FIELD2[ALL,DAY],FIELD3[DAY],FIELD4[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Verify more complex fields with multiple granularity levels are merged together. + assertResult("f:most_recent_unique('field1[DAY]','field2[DAY,HOUR]','field3[HOUR,MINUTE]','field4[ALL,MINUTE]','field5')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); + + // Lucene will parse comma-delimited granularity levels into separate strings. Ensure it still parses correctly. + assertResult("f:most_recent_unique('field1[DAY]','field2[DAY','HOUR]','field3[HOUR','MINUTE]','field4[ALL','MINUTE]','field5')", ""); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY,HOUR],FIELD3[HOUR,MINUTE],FIELD4[ALL,MINUTE],FIELD5[ALL]"); + assertOption(QueryParameters.MOST_RECENT_UNIQUE, "true"); } @Test @@ -181,11 +232,11 @@ public void testUniqueByDay() throws ParseException { // Verify fields are added with the DAY granularity. assertResult("f:unique_by_day('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[DAY],field2[DAY],field3[DAY]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[DAY],FIELD2[DAY],FIELD3[DAY]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[HOUR]') AND f:unique_by_day('field1','field2','field3') AND f:unique_by_day('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,DAY],field2[DAY,HOUR],field3[DAY],field4[DAY]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,DAY],FIELD2[DAY,HOUR],FIELD3[DAY],FIELD4[DAY]"); } @Test @@ -196,11 +247,11 @@ public void testUniqueByHour() throws ParseException { // Verify fields are added with the HOUR granularity. assertResult("f:unique_by_hour('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[HOUR],field2[HOUR],field3[HOUR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[HOUR],FIELD2[HOUR],FIELD3[HOUR]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_hour('field1','field2','field3') AND f:unique_by_hour('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,HOUR],field2[DAY,HOUR],field3[HOUR],field4[HOUR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,HOUR],FIELD2[DAY,HOUR],FIELD3[HOUR],FIELD4[HOUR]"); } @Test @@ -211,11 +262,11 @@ public void testUniqueByMonth() throws ParseException { // Verify fields are added with the HOUR granularity. assertResult("f:unique_by_month('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MONTH],field2[MONTH],field3[MONTH]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MONTH],FIELD2[MONTH],FIELD3[MONTH]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_month('field1','field2','field3') AND f:unique_by_month('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,MONTH],field2[DAY,MONTH],field3[MONTH],field4[MONTH]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,MONTH],FIELD2[DAY,MONTH],FIELD3[MONTH],FIELD4[MONTH]"); } @Test @@ -226,11 +277,11 @@ public void testUniqueBySecond() throws ParseException { // Verify fields are added with the HOUR granularity. assertResult("f:unique_by_second('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[SECOND],field2[SECOND],field3[SECOND]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[SECOND],FIELD2[SECOND],FIELD3[SECOND]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_second('field1','field2','field3') AND f:unique_by_second('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,SECOND],field2[DAY,SECOND],field3[SECOND],field4[SECOND]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,SECOND],FIELD2[DAY,SECOND],FIELD3[SECOND],FIELD4[SECOND]"); } @Test @@ -241,11 +292,11 @@ public void testUniqueByMillisecond() throws ParseException { // Verify fields are added with the HOUR granularity. assertResult("f:unique_by_millisecond('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MILLISECOND],field2[MILLISECOND],field3[MILLISECOND]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MILLISECOND],FIELD2[MILLISECOND],FIELD3[MILLISECOND]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_millisecond('field1','field2','field3') AND f:unique_by_millisecond('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,MILLISECOND],field2[DAY,MILLISECOND],field3[MILLISECOND],field4[MILLISECOND]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,MILLISECOND],FIELD2[DAY,MILLISECOND],FIELD3[MILLISECOND],FIELD4[MILLISECOND]"); } @Test @@ -256,11 +307,11 @@ public void testUniqueByYear() throws ParseException { // Verify fields are added with the MINUTE granularity. assertResult("f:unique_by_year('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[YEAR],field2[YEAR],field3[YEAR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[YEAR],FIELD2[YEAR],FIELD3[YEAR]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_year('field1','field2','field3') AND f:unique_by_year('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,YEAR],field2[DAY,YEAR],field3[YEAR],field4[YEAR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,YEAR],FIELD2[DAY,YEAR],FIELD3[YEAR],FIELD4[YEAR]"); } @Test @@ -271,11 +322,11 @@ public void testUniqueByMinute() throws ParseException { // Verify fields are added with the MINUTE granularity. assertResult("f:unique_by_minute('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MINUTE],field2[MINUTE],field3[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE],FIELD2[MINUTE],FIELD3[MINUTE]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_minute('field1','field2','field3') AND f:unique_by_minute('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,MINUTE],field2[DAY,MINUTE],field3[MINUTE],field4[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,MINUTE],FIELD2[DAY,MINUTE],FIELD3[MINUTE],FIELD4[MINUTE]"); } @Test @@ -286,26 +337,26 @@ public void testUniqueByTenth() throws ParseException { // Verify fields are added with the MINUTE granularity. assertResult("f:unique_by_tenth_of_hour('field1','field2','field3')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[TENTH_OF_HOUR],field2[TENTH_OF_HOUR],field3[TENTH_OF_HOUR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[TENTH_OF_HOUR],FIELD2[TENTH_OF_HOUR],FIELD3[TENTH_OF_HOUR]"); // Verify fields from multiple functions are merged. assertResult("f:unique('field1','field2[DAY]') AND f:unique_by_tenth_of_hour('field1','field2','field3') AND f:unique_by_tenth_of_hour('field4')", ""); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[ALL,TENTH_OF_HOUR],field2[DAY,TENTH_OF_HOUR],field3[TENTH_OF_HOUR],field4[TENTH_OF_HOUR]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[ALL,TENTH_OF_HOUR],FIELD2[DAY,TENTH_OF_HOUR],FIELD3[TENTH_OF_HOUR],FIELD4[TENTH_OF_HOUR]"); } @Test public void testNonFunctionNodesWithJunctions() throws ParseException { // Verify that only the function node is removed. assertResult("f:unique_by_minute('field1') AND FOO == 'bar'", "FOO == 'bar'"); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE]"); // Verify that only the function node is removed. assertResult("f:unique_by_minute('field1') AND (FOO == 'bar' AND BAT == 'foo')", "(FOO == 'bar' AND BAT == 'foo')"); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE]"); // Verify that only the function node is removed. assertResult("f:unique_by_minute('field1') OR FOO == 'bar'", "FOO == 'bar'"); - assertOption(QueryParameters.UNIQUE_FIELDS, "field1[MINUTE]"); + assertOption(QueryParameters.UNIQUE_FIELDS, "FIELD1[MINUTE]"); // Verify that AND nodes are cleaned up. assertResult("(FOO == 'bar' OR (BAR == 'foo' AND f:groupby('field1','field2')))", "(FOO == 'bar' OR (BAR == 'foo'))"); diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java new file mode 100644 index 00000000000..f309f4a2b9c --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformMostRecentTest.java @@ -0,0 +1,86 @@ +package datawave.query.transformer; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.Collections; +import java.util.UUID; + +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import datawave.microservice.query.QueryImpl; +import datawave.query.attributes.UniqueGranularity; +import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; +import datawave.query.tables.ShardQueryLogic; +import datawave.query.util.sortedset.FileSortedSet; + +public class UniqueTransformMostRecentTest extends UniqueTransformTest { + + protected ShardQueryLogic logic = new ShardQueryLogic(); + + @ClassRule + public static TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Before + public void setUp() throws IOException { + uniqueFields.setMostRecent(true); + + // setup the hadoop configuration + URL hadoopConfig = this.getClass().getResource("/testhadoop.config"); + logic.setHdfsSiteConfigURLs(hadoopConfig.toExternalForm()); + + // setup a directory for cache results + File tmpDir = temporaryFolder.newFolder(); + IvaratorCacheDirConfig config = new IvaratorCacheDirConfig(tmpDir.toURI().toString()); + logic.setIvaratorCacheDirConfigs(Collections.singletonList(config)); + + QueryImpl query = new QueryImpl(); + query.setId(UUID.randomUUID()); + logic.getConfig().setQuery(query); + } + + @Override + protected UniqueTransform getUniqueTransform() { + try { + // @formatter:off + return new UniqueTransform.Builder() + .withUniqueFields(uniqueFields) + .withQueryExecutionForPageTimeout(Long.MAX_VALUE) + .withBufferPersistThreshold(logic.getUniqueCacheBufferSize()) + .withIvaratorCacheDirConfigs(logic.getIvaratorCacheDirConfigs()) + .withHdfsSiteConfigURLs(logic.getHdfsSiteConfigURLs()) + .withSubDirectory(logic.getConfig().getQuery().getId().toString()) + .withMaxOpenFiles(logic.getIvaratorMaxOpenFiles()) + .withNumRetries(logic.getIvaratorNumRetries()) + .withPersistOptions(new FileSortedSet.PersistOptions(true, false, 0)) + .build(); + // @formatter:on + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Verify that field matching is case-insensitive. Query: #UNIQUE(attr0, Attr1, ATTR2) + */ + @Test + public void testMostRecentUniqueness() { + givenInputDocument(1).withKeyValue("ATTR0", randomValues.get(0)); + givenInputDocument(2).withKeyValue("ATTR0", randomValues.get(1)).isExpectedToBeUnique(); + givenInputDocument(3).withKeyValue("ATTR0", randomValues.get(0)).isExpectedToBeUnique(); + givenInputDocument(1).withKeyValue("Attr1", randomValues.get(2)); + givenInputDocument(2).withKeyValue("Attr1", randomValues.get(3)).isExpectedToBeUnique(); + givenInputDocument(3).withKeyValue("Attr1", randomValues.get(2)).isExpectedToBeUnique(); + givenInputDocument(1).withKeyValue("attr2", randomValues.get(4)); + givenInputDocument(2).withKeyValue("attr2", randomValues.get(0)).isExpectedToBeUnique(); + givenInputDocument(3).withKeyValue("attr2", randomValues.get(4)).isExpectedToBeUnique(); + + givenValueTransformerForFields(UniqueGranularity.ALL, "attr0", "Attr1", "ATTR2"); + + assertUniqueDocuments(); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java index 3eb407a43a3..60749416648 100644 --- a/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/UniqueTransformTest.java @@ -2,6 +2,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; @@ -19,45 +20,51 @@ import java.util.Set; import java.util.Spliterator; import java.util.Spliterators; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import org.apache.accumulo.core.data.ArrayByteSequence; -import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; import org.apache.commons.collections4.Transformer; import org.apache.commons.collections4.iterators.TransformIterator; import org.apache.commons.lang.RandomStringUtils; -import org.apache.hadoop.io.Text; import org.junit.After; import org.junit.BeforeClass; import org.junit.Test; import com.google.common.collect.HashMultimap; -import com.google.common.collect.Iterators; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; +import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; +import com.google.common.primitives.Longs; +import datawave.ingest.time.Now; import datawave.query.attributes.Attribute; import datawave.query.attributes.Attributes; import datawave.query.attributes.DiacriticContent; import datawave.query.attributes.Document; +import datawave.query.attributes.DocumentKey; import datawave.query.attributes.TimingMetadata; import datawave.query.attributes.UniqueFields; import datawave.query.attributes.UniqueGranularity; import datawave.query.function.LogTiming; +import datawave.query.iterator.profile.FinalDocumentTrackingIterator; import datawave.query.jexl.JexlASTHelper; public class UniqueTransformTest { - private static final Random random = new Random(1000); - private static final List randomValues = new ArrayList<>(); + protected static final Random random = new Random(1000); + private static final AtomicLong counter = new AtomicLong(); - private final List inputDocuments = new ArrayList<>(); - private final List expectedUniqueDocuments = new ArrayList<>(); - private byte[] expectedOrderedFieldValues = null; - private UniqueFields uniqueFields = new UniqueFields(); + protected static final List randomValues = new ArrayList<>(); + + protected final List inputDocuments = new ArrayList<>(); + protected final List expectedUniqueDocuments = new ArrayList<>(); + protected byte[] expectedOrderedFieldValues = null; + protected UniqueFields uniqueFields = new UniqueFields(); @BeforeClass public static void setup() { @@ -97,7 +104,7 @@ public void testUniquenessWithRandomDocuments() { while (expectedUniqueDocuments > inputDocuments.size() / 2 || expectedUniqueDocuments < 10) { fields.clear(); while (fields.size() < 3) { - fields.add("Attr" + random.nextInt(100)); + fields.add("ATTR" + random.nextInt(100)); } expectedUniqueDocuments = countUniqueness(inputDocuments, fields); } @@ -108,7 +115,7 @@ public void testUniquenessWithRandomDocuments() { assertEquals(expectedUniqueDocuments, uniqueDocuments.size()); } - private int countUniqueness(List input, Set fields) { + protected int countUniqueness(List input, Set fields) { Set uniqueValues = new HashSet<>(); for (Document document : input) { Multimap fieldValues = getFieldValues(document, fields); @@ -117,7 +124,7 @@ private int countUniqueness(List input, Set fields) { return uniqueValues.size(); } - private Multimap getFieldValues(Document document, Set fields) { + protected Multimap getFieldValues(Document document, Set fields) { Multimap values = HashMultimap.create(); for (String docField : document.getDictionary().keySet()) { for (String field : fields) { @@ -134,7 +141,7 @@ private Multimap getFieldValues(Document document, Set fi return values; } - private String getString(Multimap fieldValues) { + protected String getString(Multimap fieldValues) { StringBuilder sb = new StringBuilder(); fieldValues.keySet().stream().sorted().forEach((field) -> { if (sb.length() > 0) { @@ -154,12 +161,12 @@ public void testUniquenessForCaseInsensitivity() { givenInputDocument().withKeyValue("ATTR0", randomValues.get(0)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR0", randomValues.get(1)).isExpectedToBeUnique(); givenInputDocument().withKeyValue("ATTR0", randomValues.get(0)); - givenInputDocument().withKeyValue("Attr1", randomValues.get(2)).isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr1", randomValues.get(3)).isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr1", randomValues.get(2)); - givenInputDocument().withKeyValue("attr2", randomValues.get(4)).isExpectedToBeUnique(); - givenInputDocument().withKeyValue("attr2", randomValues.get(0)).isExpectedToBeUnique(); - givenInputDocument().withKeyValue("attr2", randomValues.get(4)); + givenInputDocument().withKeyValue("ATTR1", randomValues.get(2)).isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR1", randomValues.get(3)).isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR1", randomValues.get(2)); + givenInputDocument().withKeyValue("ATTR2", randomValues.get(4)).isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR2", randomValues.get(0)).isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR2", randomValues.get(4)); givenValueTransformerForFields(UniqueGranularity.ALL, "attr0", "Attr1", "ATTR2"); @@ -171,11 +178,11 @@ public void testUniquenessForCaseInsensitivity() { */ @Test public void testUniquenessWithValueTransformer_DAY() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 12:40:15"); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 05:04:20"); - givenInputDocument().withKeyValue("Attr0", "2001-03-12 05:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 12:40:15"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 05:04:20"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-12 05:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); @@ -187,11 +194,11 @@ public void testUniquenessWithValueTransformer_DAY() { */ @Test public void testUniquenessWithValueTransformer_HOUR() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:40:15"); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 05:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 05:04:30"); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:40:15"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 05:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 05:04:30"); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr0"); @@ -203,11 +210,11 @@ public void testUniquenessWithValueTransformer_HOUR() { */ @Test public void testUniquenessWithValueTransformer_MINUTE() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:20"); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:04:15"); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:20"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:15"); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, "Attr0"); @@ -220,18 +227,18 @@ public void testUniquenessWithValueTransformer_MINUTE() { */ @Test public void testUniquenessWithMixedValueTransformersForDifferentFields() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 12:40:15"); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 05:04:20"); - givenInputDocument().withKeyValue("Attr0", "2001-03-12 05:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr1", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr1", "2001-03-10 10:40:15"); - givenInputDocument().withKeyValue("Attr1", "2001-03-10 05:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr1", "2001-03-10 05:04:30"); - givenInputDocument().withKeyValue("Attr2", "2001-03-10 10:15:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr2", "2001-03-10 10:15:20"); - givenInputDocument().withKeyValue("Attr2", "2001-03-10 10:04:20").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr2", "2001-03-10 10:04:15"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 12:40:15"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 05:04:20"); + givenInputDocument().withKeyValue("ATTR0", "2001-03-12 05:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR1", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR1", "2001-03-10 10:40:15"); + givenInputDocument().withKeyValue("ATTR1", "2001-03-10 05:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR1", "2001-03-10 05:04:30"); + givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:15:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:15:20"); + givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:04:20").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR2", "2001-03-10 10:04:15"); givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY, "Attr0"); givenValueTransformerForFields(UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, "Attr1"); @@ -246,12 +253,12 @@ public void testUniquenessWithMixedValueTransformersForDifferentFields() { */ @Test public void testThatValueTransformer_ALL_Supersedes_MINUTE() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:01").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:02").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:03").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:04").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:15:04"); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:01").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:02").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:03").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:04").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:15:04"); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformersForField("Attr0", UniqueGranularity.ALL, UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE); @@ -264,12 +271,12 @@ public void testThatValueTransformer_ALL_Supersedes_MINUTE() { */ @Test public void testThatValueTransformer_MINUTE_Supersedes_HOUR() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:01:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:02:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:03:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:04:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:04:20"); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:01:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:02:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:03:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:04:20"); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformersForField("Attr0", UniqueGranularity.TRUNCATE_TEMPORAL_TO_MINUTE, UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR); @@ -282,12 +289,12 @@ public void testThatValueTransformer_MINUTE_Supersedes_HOUR() { */ @Test public void testThatValueTransformer_HOUR_Supersedes_DAY() { - givenInputDocument().withKeyValue("Attr0", "2001-03-10 10:01:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 11:01:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 12:01:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 13:01:15").isExpectedToBeUnique(); - givenInputDocument().withKeyValue("Attr0", "2001-03-10 13:20:15"); - givenInputDocument().withKeyValue("Attr0", "nonDateValue").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 10:01:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 11:01:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 12:01:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 13:01:15").isExpectedToBeUnique(); + givenInputDocument().withKeyValue("ATTR0", "2001-03-10 13:20:15"); + givenInputDocument().withKeyValue("ATTR0", "nonDateValue").isExpectedToBeUnique(); givenValueTransformersForField("Attr0", UniqueGranularity.TRUNCATE_TEMPORAL_TO_HOUR, UniqueGranularity.TRUNCATE_TEMPORAL_TO_DAY); @@ -299,7 +306,7 @@ public void testUniquenessWithTimingMetric() { List input = new ArrayList<>(); List expected = new ArrayList<>(); - String MARKER_STRING = "\u2735FinalDocument\u2735"; + String MARKER_STRING = FinalDocumentTrackingIterator.MARKER_TEXT.toString(); TimingMetadata timingMetadata = new TimingMetadata(); timingMetadata.setNextCount(5l); @@ -326,16 +333,16 @@ public void testUniquenessWithTwoGroups() { // @formatter:off givenInputDocument() - .withKeyValue("Attr0.0.0.0", randomValues.get(0)) - .withKeyValue("Attr1.0.1.0", randomValues.get(1)) - .withKeyValue("Attr0.0.0.1", randomValues.get(2)) - .withKeyValue("Attr1.0.1.1", randomValues.get(3)); + .withKeyValue("ATTR0.0.0.0", randomValues.get(0)) + .withKeyValue("ATTR1.0.1.0", randomValues.get(1)) + .withKeyValue("ATTR0.0.0.1", randomValues.get(2)) + .withKeyValue("ATTR1.0.1.1", randomValues.get(3)); expectedOrderedFieldValues = givenExpectedOrderedFieldValues() - .withKeyValue("Attr0", randomValues.get(0)) - .withKeyValue("Attr1", randomValues.get(1)) - .withKeyValue("Attr0", randomValues.get(2)) - .withKeyValue("Attr1", randomValues.get(3)).build(); + .withKeyValue("ATTR0", randomValues.get(0)) + .withKeyValue("ATTR1", randomValues.get(1)) + .withKeyValue("ATTR0", randomValues.get(2)) + .withKeyValue("ATTR1", randomValues.get(3)).build(); // @formatter:on givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1"); @@ -357,18 +364,18 @@ public void testUniquenessWithTwoGroupsAndUngrouped() { // @formatter:off givenInputDocument() - .withKeyValue("Attr0.0.0.0", randomValues.get(0)) - .withKeyValue("Attr1.0.1.0", randomValues.get(1)) - .withKeyValue("Attr0.0.0.1", randomValues.get(2)) - .withKeyValue("Attr1.0.1.1", randomValues.get(3)) - .withKeyValue("Attr3", randomValues.get(4)); + .withKeyValue("ATTR0.0.0.0", randomValues.get(0)) + .withKeyValue("ATTR1.0.1.0", randomValues.get(1)) + .withKeyValue("ATTR0.0.0.1", randomValues.get(2)) + .withKeyValue("ATTR1.0.1.1", randomValues.get(3)) + .withKeyValue("ATTR3", randomValues.get(4)); expectedOrderedFieldValues = givenExpectedOrderedFieldValues() - .withKeyValue("Attr0", randomValues.get(0)) - .withKeyValue("Attr1", randomValues.get(1)) - .withKeyValue("Attr0", randomValues.get(2)) - .withKeyValue("Attr1", randomValues.get(3)) - .withKeyValue("Attr3", randomValues.get(4)).build(); + .withKeyValue("ATTR0", randomValues.get(0)) + .withKeyValue("ATTR1", randomValues.get(1)) + .withKeyValue("ATTR0", randomValues.get(2)) + .withKeyValue("ATTR1", randomValues.get(3)) + .withKeyValue("ATTR3", randomValues.get(4)).build(); // @formatter:on givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); @@ -390,18 +397,18 @@ public void testUniquenessWithTwoGroupsAndSeparateGroup() { // @formatter:off givenInputDocument() - .withKeyValue("Attr0.0.0.0", randomValues.get(0)) - .withKeyValue("Attr1.0.1.0", randomValues.get(1)) - .withKeyValue("Attr0.0.0.1", randomValues.get(2)) - .withKeyValue("Attr1.0.1.1", randomValues.get(3)) - .withKeyValue("Attr3.1.0.0", randomValues.get(4)); + .withKeyValue("ATTR0.0.0.0", randomValues.get(0)) + .withKeyValue("ATTR1.0.1.0", randomValues.get(1)) + .withKeyValue("ATTR0.0.0.1", randomValues.get(2)) + .withKeyValue("ATTR1.0.1.1", randomValues.get(3)) + .withKeyValue("ATTR3.1.0.0", randomValues.get(4)); expectedOrderedFieldValues = givenExpectedOrderedFieldValues() - .withKeyValue("Attr0", randomValues.get(0)) - .withKeyValue("Attr1", randomValues.get(1)) - .withKeyValue("Attr0", randomValues.get(2)) - .withKeyValue("Attr1", randomValues.get(3)) - .withKeyValue("Attr3", randomValues.get(4)).build(); + .withKeyValue("ATTR0", randomValues.get(0)) + .withKeyValue("ATTR1", randomValues.get(1)) + .withKeyValue("ATTR0", randomValues.get(2)) + .withKeyValue("ATTR1", randomValues.get(3)) + .withKeyValue("ATTR3", randomValues.get(4)).build(); // @formatter:on givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); @@ -424,20 +431,20 @@ public void testUniquenessWithTwoGroupsAndSeparateGroups() { // @formatter:off givenInputDocument() - .withKeyValue("Attr0.0.0.0", randomValues.get(0)) - .withKeyValue("Attr1.0.1.0", randomValues.get(1)) - .withKeyValue("Attr0.0.0.1", randomValues.get(2)) - .withKeyValue("Attr1.0.1.1", randomValues.get(3)) - .withKeyValue("Attr3.1.0.0", randomValues.get(4)) - .withKeyValue("Attr3.1.0.1", randomValues.get(0)); + .withKeyValue("ATTR0.0.0.0", randomValues.get(0)) + .withKeyValue("ATTR1.0.1.0", randomValues.get(1)) + .withKeyValue("ATTR0.0.0.1", randomValues.get(2)) + .withKeyValue("ATTR1.0.1.1", randomValues.get(3)) + .withKeyValue("ATTR3.1.0.0", randomValues.get(4)) + .withKeyValue("ATTR3.1.0.1", randomValues.get(0)); expectedOrderedFieldValues = givenExpectedOrderedFieldValues() - .withKeyValue("Attr0", randomValues.get(0)) - .withKeyValue("Attr1", randomValues.get(1)) - .withKeyValue("Attr0", randomValues.get(2)) - .withKeyValue("Attr1", randomValues.get(3)) - .withKeyValue("Attr3", randomValues.get(4)) - .withKeyValue("Attr3", randomValues.get(0)).build(); + .withKeyValue("ATTR0", randomValues.get(0)) + .withKeyValue("ATTR1", randomValues.get(1)) + .withKeyValue("ATTR0", randomValues.get(2)) + .withKeyValue("ATTR1", randomValues.get(3)) + .withKeyValue("ATTR3", randomValues.get(4)) + .withKeyValue("ATTR3", randomValues.get(0)).build(); // @formatter:on givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); @@ -458,18 +465,18 @@ public void testUniquenessWithTwoGroupsAndPartialGroups() { // @formatter:off givenInputDocument() - .withKeyValue("Attr0.0.0.0", randomValues.get(0)) - .withKeyValue("Attr1.0.1.0", randomValues.get(1)) - .withKeyValue("Attr0.0.0.1", randomValues.get(2)) - .withKeyValue("Attr3.1.0.0", randomValues.get(4)) - .withKeyValue("Attr3.1.0.1", randomValues.get(0)); + .withKeyValue("ATTR0.0.0.0", randomValues.get(0)) + .withKeyValue("ATTR1.0.1.0", randomValues.get(1)) + .withKeyValue("ATTR0.0.0.1", randomValues.get(2)) + .withKeyValue("ATTR3.1.0.0", randomValues.get(4)) + .withKeyValue("ATTR3.1.0.1", randomValues.get(0)); expectedOrderedFieldValues = givenExpectedOrderedFieldValues() - .withKeyValue("Attr0", randomValues.get(0)) - .withKeyValue("Attr1", randomValues.get(1)) - .withKeyValue("Attr0", randomValues.get(2)) - .withKeyValue("Attr3", randomValues.get(4)) - .withKeyValue("Attr3", randomValues.get(0)).build(); + .withKeyValue("ATTR0", randomValues.get(0)) + .withKeyValue("ATTR1", randomValues.get(1)) + .withKeyValue("ATTR0", randomValues.get(2)) + .withKeyValue("ATTR3", randomValues.get(4)) + .withKeyValue("ATTR3", randomValues.get(0)).build(); // @formatter:on givenValueTransformerForFields(UniqueGranularity.ALL, "Attr0", "Attr1", "Attr3"); @@ -477,27 +484,64 @@ public void testUniquenessWithTwoGroupsAndPartialGroups() { assertOrderedFieldValues(); } - private void assertUniqueDocuments() { + @Test + public void testFinalDocIgnored() { + SortedSetMultimap fieldMap = TreeMultimap.create(); + fieldMap.put("FIELD", UniqueGranularity.ALL); + UniqueFields fields = new UniqueFields(fieldMap); + UniqueTransform transform = new UniqueTransform(fields, 10000000L); + Key key = new Key("shard", "dt\u0000uid", FinalDocumentTrackingIterator.MARKER_TEXT.toString()); + Document doc = new Document(); + Map.Entry entry = new UnmodifiableMapEntry(key, doc); + for (int i = 0; i < 10; i++) { + assertTrue(entry == transform.apply(entry)); + } + } + + @Test + public void testIntermediateIgnored() { + SortedSetMultimap fieldMap = TreeMultimap.create(); + fieldMap.put("FIELD", UniqueGranularity.ALL); + UniqueFields fields = new UniqueFields(fieldMap); + UniqueTransform transform = new UniqueTransform(fields, 10000000L); + Key key = new Key("shard", "dt\u0000uid"); + Document doc = new Document(); + doc.setIntermediateResult(true); + Map.Entry entry = new UnmodifiableMapEntry(key, doc); + for (int i = 0; i < 10; i++) { + assertTrue(entry == transform.apply(entry)); + } + } + + protected void assertUniqueDocuments() { List actual = getUniqueDocumentsWithUpdateConfigCalls(inputDocuments); Collections.sort(expectedUniqueDocuments); Collections.sort(actual); - assertEquals("Unique documents do not match expected", expectedUniqueDocuments, actual); + assertEquals("Unique documents do not match expected", getIds(expectedUniqueDocuments), getIds(actual)); } - private List getUniqueDocuments(List documents) { + protected List getIds(List docs) { + List ids = new ArrayList<>(); + for (Document d : docs) { + ids.add(d.getDictionary().get("RECORD_ID").getData().toString()); + } + return ids; + } + + protected List getUniqueDocuments(List documents) { Transformer> docToEntry = document -> Maps.immutableEntry(document.getMetadata(), document); TransformIterator> inputIterator = new TransformIterator<>(documents.iterator(), docToEntry); UniqueTransform uniqueTransform = getUniqueTransform(); - Iterator> resultIterator = Iterators.transform(inputIterator, uniqueTransform); + Iterator> resultIterator = uniqueTransform.getIterator(inputIterator); return StreamSupport.stream(Spliterators.spliteratorUnknownSize(resultIterator, Spliterator.ORDERED), false).filter(Objects::nonNull) .map(Map.Entry::getValue).collect(Collectors.toList()); } - private List getUniqueDocumentsWithUpdateConfigCalls(List documents) { + protected List getUniqueDocumentsWithUpdateConfigCalls(List documents) { Transformer> docToEntry = document -> Maps.immutableEntry(document.getMetadata(), document); TransformIterator> inputIterator = new TransformIterator<>(documents.iterator(), docToEntry); UniqueTransform uniqueTransform = getUniqueTransform(); - Iterator> resultIterator = Iterators.transform(inputIterator, uniqueTransform); + Iterator> resultIterator = uniqueTransform.getIterator(inputIterator); ArrayList docs = new ArrayList<>(); while (resultIterator.hasNext()) { Map.Entry next = resultIterator.next(); @@ -509,7 +553,7 @@ private List getUniqueDocumentsWithUpdateConfigCalls(List do return docs; } - private void assertOrderedFieldValues() { + protected void assertOrderedFieldValues() { try { UniqueTransform uniqueTransform = getUniqueTransform(); for (Document d : inputDocuments) { @@ -521,53 +565,61 @@ private void assertOrderedFieldValues() { } } - private void givenValueTransformerForFields(UniqueGranularity transformer, String... fields) { + protected void givenValueTransformerForFields(UniqueGranularity transformer, String... fields) { Arrays.stream(fields).forEach((field) -> uniqueFields.put(field, transformer)); } - private void givenValueTransformersForField(String field, UniqueGranularity... transformers) { + protected void givenValueTransformersForField(String field, UniqueGranularity... transformers) { Arrays.stream(transformers).forEach((transformer) -> uniqueFields.put(field, transformer)); } - private UniqueTransform getUniqueTransform() { - return new UniqueTransform(uniqueFields, Long.MAX_VALUE); + protected UniqueTransform getUniqueTransform() { + try { + return new UniqueTransform.Builder().withUniqueFields(uniqueFields).withQueryExecutionForPageTimeout(Long.MAX_VALUE).build(); + } catch (IOException e) { + throw new RuntimeException(e); + } } - private void updateUniqueTransform(UniqueTransform uniqueTransform) { - uniqueTransform.updateConfig(uniqueFields, null); + protected void updateUniqueTransform(UniqueTransform uniqueTransform) { + uniqueTransform.updateConfig(uniqueFields); } - private InputDocumentBuilder givenInputDocument() { - return new InputDocumentBuilder(); + protected InputDocumentBuilder givenInputDocument() { + return new InputDocumentBuilder("", 0); } - private InputDocumentBuilder givenInputDocument(String docKey) { - return new InputDocumentBuilder(docKey); + protected InputDocumentBuilder givenInputDocument(String cq) { + return new InputDocumentBuilder(cq, 0); } - private ExpectedOrderedFieldValuesBuilder givenExpectedOrderedFieldValues() { - return new ExpectedOrderedFieldValuesBuilder(); + protected InputDocumentBuilder givenInputDocument(long ts) { + return new InputDocumentBuilder("", ts); } - private class InputDocumentBuilder { + protected InputDocumentBuilder givenInputDocument(String docKey, long ts) { + return new InputDocumentBuilder(docKey, ts); + } - private final Document document; + protected ExpectedOrderedFieldValuesBuilder givenExpectedOrderedFieldValues() { + return new ExpectedOrderedFieldValuesBuilder(); + } - InputDocumentBuilder() { - this.document = new Document(); - inputDocuments.add(document); - } + protected class InputDocumentBuilder { - @SuppressWarnings({"UnusedReturnValue", "SameParameterValue"}) - InputDocumentBuilder(String docKey) { + private final Document document; - Text MARKER_TEXT = new Text(docKey); - ByteSequence MARKER_SEQUENCE = new ArrayByteSequence(MARKER_TEXT.getBytes(), 0, MARKER_TEXT.getLength()); - byte EMPTY_BYTES[] = new byte[0]; - Key key = new Key(EMPTY_BYTES, EMPTY_BYTES, MARKER_SEQUENCE.subSequence(0, MARKER_SEQUENCE.length()).toArray()); + InputDocumentBuilder(String cq, long ts) { + Key key = new Key("shardid", "datatype\u0000" + getUid(), cq, ts); this.document = new Document(key, true); inputDocuments.add(document); this.document.getMetadata().set(key); + Attribute docKeyAttributes = new DocumentKey(key, true); + this.document.put(Document.DOCKEY_FIELD_NAME, docKeyAttributes); + } + + String getUid() { + return UUID.nameUUIDFromBytes(Longs.toByteArray(counter.incrementAndGet())).toString(); } @SuppressWarnings({"UnusedReturnValue", "SameParameterValue"}) @@ -590,7 +642,7 @@ private String getRandomKey(int index) { if (random.nextBoolean()) { sb.append(JexlASTHelper.IDENTIFIER_PREFIX); } - return sb.append("Attr").append(index).toString(); + return sb.append("ATTR").append(index).toString(); } private String getRandomValue() { @@ -609,7 +661,7 @@ InputDocumentBuilder isExpectedToBeUnique() { } } - private class ExpectedOrderedFieldValuesBuilder { + protected class ExpectedOrderedFieldValuesBuilder { private Multimap fieldValues = TreeMultimap.create(); @@ -624,13 +676,16 @@ public byte[] build() { try { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); DataOutputStream output = new DataOutputStream(bytes); - int count = 0; for (String field : fieldValues.keySet()) { - String separator = "f-" + field + '/' + (count++) + ":"; - for (String value : fieldValues.get(field)) { + String separator = "f-" + field + ":"; + if (fieldValues.isEmpty()) { output.writeUTF(separator); - output.writeUTF(value); - separator = ","; + } else { + for (String value : fieldValues.get(field)) { + output.writeUTF(separator); + output.writeUTF(value); + separator = ","; + } } } output.flush(); diff --git a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java index 9ed431a66d9..45eb41c76a5 100644 --- a/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java +++ b/warehouse/query-core/src/test/java/datawave/query/util/WiseGuysIngest.java @@ -54,10 +54,13 @@ public enum WhatKindaRange { protected static final long timeStamp = 1356998400000L; public static final String corleoneUID = UID.builder().newId("Corleone".getBytes(), (Date) null).toString(); + public static final long corleoneTimeStampDelta = 0; public static final String corleoneChildUID = UID.builder().newId("Corleone".getBytes(), (Date) null, "1").toString(); - public static final String sopranoUID = UID.builder().newId("Soprano".toString().getBytes(), (Date) null).toString(); - public static final String caponeUID = UID.builder().newId("Capone".toString().getBytes(), (Date) null).toString(); - public static final String tattagliaUID = UID.builder().newId("Tattaglia".toString().getBytes(), (Date) null).toString(); + public static final String sopranoUID = UID.builder().newId("Soprano".getBytes(), (Date) null).toString(); + public static final long sopranoTimeStampDelta = 10; + public static final String caponeUID = UID.builder().newId("Capone".getBytes(), (Date) null).toString(); + public static final long caponeTimeStampDelta = 20; + public static final String tattagliaUID = UID.builder().newId("Tattaglia".getBytes(), (Date) null).toString(); protected static String normalizeColVal(Map.Entry colVal) { switch (colVal.getKey()) { @@ -96,81 +99,93 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw bw = client.createBatchWriter(TableName.SHARD, bwConfig); mutation = new Mutation(shard); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.0" + "\u0000" + "SANTINO", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.1" + "\u0000" + "FREDO", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.2" + "\u0000" + "MICHAEL", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.3" + "\u0000" + "CONSTANZIA", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.4" + "\u0000" + "LUCA", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NOME.5" + "\u0000" + "VINCENT", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.0" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.1" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.2" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.3" + "\u0000" + "FEMALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.4" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.5" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.0" + "\u0000" + "24", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.1" + "\u0000" + "22", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.2" + "\u0000" + "20", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.3" + "\u0000" + "18", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.4" + "\u0000" + "40", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "ETA.5" + "\u0000" + "22", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "UUID.0" + "\u0000" + "CORLEONE", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.0" + "\u0000" + "SANTINO", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.1" + "\u0000" + "FREDO", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.2" + "\u0000" + "MICHAEL", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.3" + "\u0000" + "CONSTANZIA", columnVisibility, timeStamp + corleoneTimeStampDelta, + emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.4" + "\u0000" + "LUCA", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NOME.5" + "\u0000" + "VINCENT", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.0" + "\u0000" + "MALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.1" + "\u0000" + "MALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.2" + "\u0000" + "MALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.3" + "\u0000" + "FEMALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.4" + "\u0000" + "MALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GENERE.5" + "\u0000" + "MALE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.0" + "\u0000" + "24", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.1" + "\u0000" + "22", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.2" + "\u0000" + "20", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.3" + "\u0000" + "18", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.4" + "\u0000" + "40", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "ETA.5" + "\u0000" + "22", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "UUID.0" + "\u0000" + "CORLEONE", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); // CORLEONE date delta is 70 years - mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE" + "\u0000" + "1930-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "QUOTE" + "\u0000" + "Im gonna make him an offer he cant refuse", columnVisibility, timeStamp, + mutation.put(datatype + "\u0000" + corleoneUID, "BIRTH_DATE" + "\u0000" + "1930-12-28T00:00:05.000Z", columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "QUOTE" + "\u0000" + "Im gonna make him an offer he cant refuse", columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "NUMBER" + "\u0000" + "25", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneUID, "GEO" + "\u0000" + "POINT(10 10)", columnVisibility, timeStamp + corleoneTimeStampDelta, + emptyValue); + + mutation.put(datatype + "\u0000" + corleoneChildUID, "UUID.0" + "\u0000" + "ANDOLINI", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "NUMBER" + "\u0000" + "25", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneUID, "GEO" + "\u0000" + "POINT(10 10)", columnVisibility, timeStamp, emptyValue); - - mutation.put(datatype + "\u0000" + corleoneChildUID, "UUID.0" + "\u0000" + "ANDOLINI", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneChildUID, "ETA.0" + "\u0000" + "12", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneChildUID, "BIRTH_DATE" + "\u0000" + "1930-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + corleoneChildUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.0" + "\u0000" + "ANTHONY", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "NAME.1" + "\u0000" + "MEADOW", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.0" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.1" + "\u0000" + "FEMALE", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + corleoneChildUID, "ETA.0" + "\u0000" + "12", columnVisibility, timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneChildUID, "BIRTH_DATE" + "\u0000" + "1930-12-28T00:00:05.000Z", columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + corleoneChildUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.0" + "\u0000" + "ANTHONY", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "NAME.1" + "\u0000" + "MEADOW", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.0" + "\u0000" + "MALE", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GENDER.1" + "\u0000" + "FEMALE", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); // to test whether singleton values correctly get matched using the function set methods, only add AGE.1 - // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "AGE.1" + "\u0000" + "18", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "UUID.0" + "\u0000" + "SOPRANO", columnVisibility, timeStamp, emptyValue); + // mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.0" + "\u0000" + "16", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "AGE.1" + "\u0000" + "18", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "UUID.0" + "\u0000" + "SOPRANO", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); // soprano date delta is 50 years - mutation.put(datatype + "\u0000" + sopranoUID, "BIRTH_DATE" + "\u0000" + "1950-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "BIRTH_DATE" + "\u0000" + "1950-12-28T00:00:05.000Z", columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "DEATH_DATE" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); mutation.put(datatype + "\u0000" + sopranoUID, "QUOTE" + "\u0000" + "If you can quote the rules then you can obey them", columnVisibility, - timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + sopranoUID, "GEO" + "\u0000" + "POINT(20 20)", columnVisibility, timeStamp, emptyValue); - - mutation.put(datatype + "\u0000" + caponeUID, "NAME.0" + "\u0000" + "ALPHONSE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.1" + "\u0000" + "FRANK", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.2" + "\u0000" + "RALPH", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NAME.3" + "\u0000" + "MICHAEL", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.0" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.1" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.2" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GENDER.3" + "\u0000" + "MALE", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.0" + "\u0000" + "30", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.1" + "\u0000" + "34", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.2" + "\u0000" + "20", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "AGE.3" + "\u0000" + "40", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "UUID.0" + "\u0000" + "CAPONE", columnVisibility, timeStamp, emptyValue); + timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + sopranoUID, "GEO" + "\u0000" + "POINT(20 20)", columnVisibility, timeStamp + sopranoTimeStampDelta, emptyValue); + + mutation.put(datatype + "\u0000" + caponeUID, "NAME.0" + "\u0000" + "ALPHONSE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.1" + "\u0000" + "FRANK", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.2" + "\u0000" + "RALPH", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NAME.3" + "\u0000" + "MICHAEL", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.0" + "\u0000" + "MALE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.1" + "\u0000" + "MALE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.2" + "\u0000" + "MALE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GENDER.3" + "\u0000" + "MALE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.0" + "\u0000" + "30", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.1" + "\u0000" + "34", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.2" + "\u0000" + "20", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "AGE.3" + "\u0000" + "40", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "MAGIC.0" + "\u0000" + "18", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "UUID.0" + "\u0000" + "CAPONE", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); // capone date delta is 89 or 90 years - mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.0" + "\u0000" + "1910-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.0" + "\u0000" + "1910-12-28T00:00:05.000Z", columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); // add a second date to test function taking an Iterable - mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.1" + "\u0000" + "1911-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "DEATH_DATE.0" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, timeStamp, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "BIRTH_DATE.1" + "\u0000" + "1911-12-28T00:00:05.000Z", columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "DEATH_DATE.0" + "\u0000" + "2000-12-28T00:00:05.000Z", columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); mutation.put(datatype + "\u0000" + caponeUID, "QUOTE" + "\u0000" + "You can get much farther with a kind word and a gun than you can with a kind word alone", columnVisibility, - timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "NUMBER" + "\u0000" + "25", columnVisibility, timeStamp, emptyValue); - mutation.put(datatype + "\u0000" + caponeUID, "GEO" + "\u0000" + "POINT(30 30)", columnVisibility, timeStamp, emptyValue); + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "NUMBER" + "\u0000" + "25", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put(datatype + "\u0000" + caponeUID, "GEO" + "\u0000" + "POINT(30 30)", columnVisibility, timeStamp + caponeTimeStampDelta, emptyValue); // second datatype shard data mutation.put(secondDataType + "\u0000" + tattagliaUID, "NAME.0" + "\u0000" + "Philip", columnVisibility, timeStamp, emptyValue); @@ -410,9 +425,10 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw bw.addMutation(mutation); // add some tokens - addTokens(bw, range, "QUOTE", "Im gonna make him an offer he cant refuse", corleoneUID); - addTokens(bw, range, "QUOTE", "If you can quote the rules then you can obey them", sopranoUID); - addTokens(bw, range, "QUOTE", "You can get much farther with a kind word and a gun than you can with a kind word alone", caponeUID); + addTokens(bw, range, "QUOTE", "Im gonna make him an offer he cant refuse", corleoneUID, corleoneTimeStampDelta); + addTokens(bw, range, "QUOTE", "If you can quote the rules then you can obey them", sopranoUID, sopranoTimeStampDelta); + addTokens(bw, range, "QUOTE", "You can get much farther with a kind word and a gun than you can with a kind word alone", caponeUID, + caponeTimeStampDelta); } finally { if (null != bw) { bw.close(); @@ -637,103 +653,112 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw // corleones // uuid mutation.put("fi\u0000" + "UUID", lcNoDiacriticsType.normalize("CORLEONE") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); // uuid mutation.put("fi\u0000" + "UUID", lcNoDiacriticsType.normalize("ANDOLINI") + "\u0000" + datatype + "\u0000" + corleoneChildUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); // names mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("SANTINO") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); - mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("FREDO") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("FREDO") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("MICHAEL") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("CONSTANZIA") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); - mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("LUCA") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("LUCA") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); mutation.put("fi\u0000" + "NOME", lcNoDiacriticsType.normalize("VINCENT") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); // genders mutation.put("fi\u0000" + "GENERE", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); mutation.put("fi\u0000" + "GENERE", lcNoDiacriticsType.normalize("FEMALE") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + corleoneTimeStampDelta, emptyValue); // ages - mutation.put("fi\u0000" + "ETA", numberType.normalize("24") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "ETA", numberType.normalize("22") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "ETA", numberType.normalize("20") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "ETA", numberType.normalize("18") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "ETA", numberType.normalize("40") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "ETA", numberType.normalize("12") + "\u0000" + datatype + "\u0000" + corleoneChildUID, columnVisibility, timeStamp, - emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("24") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("22") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("20") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("18") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("40") + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "ETA", numberType.normalize("12") + "\u0000" + datatype + "\u0000" + corleoneChildUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); // geo for (String normalized : ((OneToManyNormalizerType) geoType).normalizeToMany("POINT(10 10)")) { - mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); } // sopranos // uuid mutation.put("fi\u0000" + "UUID", lcNoDiacriticsType.normalize("SOPRANO") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + sopranoTimeStampDelta, emptyValue); // names mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("ANTHONY") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, - timeStamp, emptyValue); - mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("MEADOW") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp, - emptyValue); + timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("MEADOW") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); // genders - mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp, - emptyValue); + mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("FEMALE") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, - timeStamp, emptyValue); + timeStamp + sopranoTimeStampDelta, emptyValue); // ages - mutation.put("fi\u0000" + "AGE", numberType.normalize("16") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "AGE", numberType.normalize("18") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("16") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("18") + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); // geo for (String normalized : ((OneToManyNormalizerType) geoType).normalizeToMany("POINT(20 20)")) { - mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp + sopranoTimeStampDelta, + emptyValue); } // capones // uuid - mutation.put("fi\u0000" + "UUID", lcNoDiacriticsType.normalize("CAPONE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); + mutation.put("fi\u0000" + "UUID", lcNoDiacriticsType.normalize("CAPONE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); // names mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("ALPHONSE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, - timeStamp, emptyValue); - mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("FRANK") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("RALPH") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("MICHAEL") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("FRANK") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("RALPH") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "NAME", lcNoDiacriticsType.normalize("MICHAEL") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); // genders - mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); - mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, - emptyValue); + mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "GENDER", lcNoDiacriticsType.normalize("MALE") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); // ages - mutation.put("fi\u0000" + "AGE", numberType.normalize("30") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "AGE", numberType.normalize("34") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "AGE", numberType.normalize("20") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "AGE", numberType.normalize("40") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("30") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("34") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("20") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "AGE", numberType.normalize("40") + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, + timeStamp + caponeTimeStampDelta, emptyValue); // geo for (String normalized : ((OneToManyNormalizerType) geoType).normalizeToMany("POINT(30 30)")) { - mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "GEO", normalized + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp + caponeTimeStampDelta, + emptyValue); } // second datatype field index data @@ -751,16 +776,22 @@ public static void writeItAll(AccumuloClient client, WhatKindaRange range) throw emptyValue); // add some index-only fields - mutation.put("fi\u0000" + "LOCATION", "chicago" + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "POSIZIONE", "newyork" + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "LOCATION", "newjersey" + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, timeStamp, emptyValue); - mutation.put("fi\u0000" + "SENTENCE", "11y" + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp, emptyValue); + mutation.put("fi\u0000" + "LOCATION", "chicago" + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp + caponeTimeStampDelta, + emptyValue); + mutation.put("fi\u0000" + "POSIZIONE", "newyork" + "\u0000" + datatype + "\u0000" + corleoneUID, columnVisibility, + timeStamp + corleoneTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "LOCATION", "newjersey" + "\u0000" + datatype + "\u0000" + sopranoUID, columnVisibility, + timeStamp + sopranoTimeStampDelta, emptyValue); + mutation.put("fi\u0000" + "SENTENCE", "11y" + "\u0000" + datatype + "\u0000" + caponeUID, columnVisibility, timeStamp + caponeTimeStampDelta, + emptyValue); bw.addMutation(mutation); - addFiTfTokens(bw, range, "QUOTE", "Im gonna make him an offer he cant refuse", corleoneUID); - addFiTfTokens(bw, range, "QUOTE", "If you can quote the rules then you can obey them", sopranoUID); - addFiTfTokens(bw, range, "QUOTE", "You can get much farther with a kind word and a gun than you can with a kind word alone", caponeUID); + addFiTfTokens(bw, range, "QUOTE", "Im gonna make him an offer he cant refuse", corleoneUID, corleoneTimeStampDelta); + addFiTfTokens(bw, range, "QUOTE", "If you can quote the rules then you can obey them", sopranoUID, sopranoTimeStampDelta); + addFiTfTokens(bw, range, "QUOTE", "You can get much farther with a kind word and a gun than you can with a kind word alone", caponeUID, + caponeTimeStampDelta); + } finally { if (null != bw) { bw.close(); @@ -1060,31 +1091,33 @@ private static Value getValueForNuthinAndYourHitsForFree() { return new Value(builder.build().toByteArray()); } - private static void addTokens(BatchWriter bw, WhatKindaRange range, String field, String phrase, String uid) throws MutationsRejectedException { + private static void addTokens(BatchWriter bw, WhatKindaRange range, String field, String phrase, String uid, long timeStampDelta) + throws MutationsRejectedException { Mutation mutation = new Mutation(lcNoDiacriticsType.normalize(phrase)); - mutation.put(field.toUpperCase(), shard + "\u0000" + datatype, columnVisibility, timeStamp, + mutation.put(field.toUpperCase(), shard + "\u0000" + datatype, columnVisibility, timeStamp + timeStampDelta, range == WhatKindaRange.SHARD ? getValueForNuthinAndYourHitsForFree() : getValueForBuilderFor(uid)); bw.addMutation(mutation); String[] tokens = phrase.split(" "); for (String token : tokens) { mutation = new Mutation(lcNoDiacriticsType.normalize(token)); - mutation.put(field.toUpperCase(), shard + "\u0000" + datatype, columnVisibility, timeStamp, + mutation.put(field.toUpperCase(), shard + "\u0000" + datatype, columnVisibility, timeStamp + timeStampDelta, range == WhatKindaRange.SHARD ? getValueForNuthinAndYourHitsForFree() : getValueForBuilderFor(uid)); bw.addMutation(mutation); } } - private static void addFiTfTokens(BatchWriter bw, WhatKindaRange range, String field, String phrase, String uid) throws MutationsRejectedException { + private static void addFiTfTokens(BatchWriter bw, WhatKindaRange range, String field, String phrase, String uid, long timeStampDelta) + throws MutationsRejectedException { Mutation fi = new Mutation(shard); - fi.put("fi\u0000" + field.toUpperCase(), lcNoDiacriticsType.normalize(phrase) + "\u0000" + datatype + "\u0000" + uid, columnVisibility, timeStamp, - emptyValue); + fi.put("fi\u0000" + field.toUpperCase(), lcNoDiacriticsType.normalize(phrase) + "\u0000" + datatype + "\u0000" + uid, columnVisibility, + timeStamp + timeStampDelta, emptyValue); OffsetQueue tokenOffsetCache = new BoundedOffsetQueue<>(500); int i = 0; String[] tokens = phrase.split(" "); for (String token : tokens) { - fi.put("fi\u0000" + field.toUpperCase(), lcNoDiacriticsType.normalize(token) + "\u0000" + datatype + "\u0000" + uid, columnVisibility, timeStamp, - emptyValue); + fi.put("fi\u0000" + field.toUpperCase(), lcNoDiacriticsType.normalize(token) + "\u0000" + datatype + "\u0000" + uid, columnVisibility, + timeStamp + timeStampDelta, emptyValue); tokenOffsetCache.addOffset(new TermAndZone(token, field.toUpperCase()), i); i++; @@ -1097,7 +1130,7 @@ private static void addFiTfTokens(BatchWriter bw, WhatKindaRange range, String f } Value value = new Value(builder.build().toByteArray()); fi.put("tf", datatype + "\u0000" + uid + "\u0000" + lcNoDiacriticsType.normalize(nfv.getIndexedFieldValue()) + "\u0000" + nfv.getIndexedFieldName(), - columnVisibility, timeStamp, value); + columnVisibility, timeStamp + timeStampDelta, value); } bw.addMutation(fi); } diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedByteDocumentSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedByteDocumentSortedMapTest.java new file mode 100644 index 00000000000..07b99fdef6e --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedByteDocumentSortedMapTest.java @@ -0,0 +1,71 @@ +package datawave.query.util.sortedmap; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.query.attributes.Document; +import datawave.query.composite.CompositeMetadata; +import datawave.query.predicate.EventDataQueryFieldFilter; +import datawave.query.util.TypeMetadata; +import datawave.query.util.sortedmap.rfile.KeyValueByteDocumenTransformsTest; +import datawave.query.util.sortedset.ByteArrayComparator; + +public class BufferedFileBackedByteDocumentSortedMapTest extends BufferedFileBackedRewritableSortedMapTest { + + private Comparator keyComparator = new ByteArrayComparator(); + + private FileSortedMap.RewriteStrategy keyValueComparator = new FileSortedMap.RewriteStrategy<>() { + @Override + public boolean rewrite(byte[] key, Document original, Document update) { + long ts1 = original.get(Document.DOCKEY_FIELD_NAME).getTimestamp(); + long ts2 = update.get(Document.DOCKEY_FIELD_NAME).getTimestamp(); + return (ts2 > ts1); + } + }; + + @Override + protected void testEquality(Map.Entry expected, Map.Entry value) { + testEquality(expected.getKey(), value.getKey()); + KeyValueByteDocumenTransformsTest.assertDocumentEquals(expected.getValue(), value.getValue()); + } + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return keyValueComparator; + } + + @Override + public byte[] createKey(byte[] values) { + return values; + } + + @Override + public Document createValue(byte[] values) { + Key docKey = new Key("20200101_1", "datatype\u0000uid", "", values[0]); + Key attrKey = new Key("20200101_1", "datatype\u0000uid", "FIELD\u0000VALUE", values[0]); + List> attrs = new ArrayList<>(); + attrs.add(new UnmodifiableMapEntry(attrKey, new Value())); + Document doc = new Document(docKey, Collections.singleton(docKey), false, attrs.iterator(), + new TypeMetadata().put("FIELD", "datatype", LcNoDiacriticsType.class.getName()), new CompositeMetadata(), true, true, + new EventDataQueryFieldFilter()); + return doc; + } + + @Override + public Comparator getComparator() { + return keyComparator; + } + + @Override + public FileSortedMap.FileSortedMapFactory getFactory() { + return new FileByteDocumentSortedMap.Factory(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyDocumentSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyDocumentSortedMapTest.java new file mode 100644 index 00000000000..8e3086d6d0b --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyDocumentSortedMapTest.java @@ -0,0 +1,75 @@ +package datawave.query.util.sortedmap; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.query.attributes.Document; +import datawave.query.composite.CompositeMetadata; +import datawave.query.predicate.EventDataQueryFieldFilter; +import datawave.query.util.TypeMetadata; +import datawave.query.util.sortedmap.rfile.KeyValueByteDocumenTransformsTest; + +public class BufferedFileBackedKeyDocumentSortedMapTest extends BufferedFileBackedRewritableSortedMapTest { + + private Comparator keyComparator = new Comparator<>() { + @Override + public int compare(Key o1, Key o2) { + return o1.compareTo(o2); + } + }; + + private FileSortedMap.RewriteStrategy keyValueComparator = new FileSortedMap.RewriteStrategy<>() { + @Override + public boolean rewrite(Key key, Document original, Document update) { + long ts1 = original.get(Document.DOCKEY_FIELD_NAME).getTimestamp(); + long ts2 = update.get(Document.DOCKEY_FIELD_NAME).getTimestamp(); + return (ts2 > ts1); + } + }; + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return keyValueComparator; + } + + @Override + public Key createKey(byte[] values) { + return new Key(values); + } + + @Override + public Document createValue(byte[] values) { + Key docKey = new Key("20200101_1", "datatype\u0000uid", "", values[0]); + Key attrKey = new Key("20200101_1", "datatype\u0000uid", "FIELD\u0000VALUE", values[0]); + List> attrs = new ArrayList<>(); + attrs.add(new UnmodifiableMapEntry(attrKey, new Value())); + Document doc = new Document(docKey, Collections.singleton(docKey), false, attrs.iterator(), + new TypeMetadata().put("FIELD", "datatype", LcNoDiacriticsType.class.getName()), new CompositeMetadata(), true, true, + new EventDataQueryFieldFilter()); + return doc; + } + + @Override + protected void testEquality(Map.Entry expected, Map.Entry value) { + testEquality(expected.getKey(), value.getKey()); + KeyValueByteDocumenTransformsTest.assertDocumentEquals(expected.getValue(), value.getValue()); + } + + @Override + public Comparator getComparator() { + return keyComparator; + } + + @Override + public FileSortedMap.FileSortedMapFactory getFactory() { + return new FileKeyDocumentSortedMap.Factory(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyValueSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyValueSortedMapTest.java new file mode 100644 index 00000000000..f72407a337f --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedKeyValueSortedMapTest.java @@ -0,0 +1,58 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; + +import java.util.Comparator; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +public class BufferedFileBackedKeyValueSortedMapTest extends BufferedFileBackedRewritableSortedMapTest { + + private Comparator keyComparator = new Comparator<>() { + @Override + public int compare(Key o1, Key o2) { + return o1.compareTo(o2); + } + }; + + private FileSortedMap.RewriteStrategy keyValueComparator = new FileSortedMap.RewriteStrategy<>() { + @Override + public boolean rewrite(Key key, Value original, Value update) { + return original.compareTo(update) < 0; + } + }; + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return keyValueComparator; + } + + @Override + public Key createKey(byte[] values) { + return new Key(values); + } + + @Override + public Value createValue(byte[] values) { + return new Value(values); + } + + @Override + public void testEquality(Map.Entry expected, Map.Entry value) { + assertEquals(expected.getKey(), value.getKey()); + assertEquals(expected.getValue(), value.getValue()); + } + + @Override + public Comparator getComparator() { + return keyComparator; + } + + @Override + public FileSortedMap.FileSortedMapFactory getFactory() { + return new FileKeyValueSortedMap.Factory(); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedRewritableSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedRewritableSortedMapTest.java new file mode 100644 index 00000000000..42b8c6b443c --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedRewritableSortedMapTest.java @@ -0,0 +1,98 @@ +package datawave.query.util.sortedmap; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; + +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.junit.Test; + +public abstract class BufferedFileBackedRewritableSortedMapTest extends BufferedFileBackedSortedMapTest { + + /** + * Create a key given the specified value. This key should sort in the same way the underlying byte array will sort against other byte array. + * + * @param values + * @return The key + */ + public abstract K createKey(byte[] values); + + /** + * Create a value given the specified value. + * + * @param values + * @return The value + */ + public abstract V createValue(byte[] values); + + /** + * Get a rewrite strategy. This strategy should allow rewrites if the value is smaller. + * + * @return the rewrite strategy appropriate for key and value types + */ + @Override + public abstract FileSortedMap.RewriteStrategy getRewriteStrategy(); + + @Override + public Map.Entry createData(byte[] values) { + byte[] vbuffer = new byte[values.length]; + Arrays.fill(vbuffer, (byte) (values[0] + 1)); + return new UnmodifiableMapEntry(createKey(values), createValue(vbuffer)); + } + + @Test + public void testRewrite() throws Exception { + // create a new set of data, only half of which has greater Values + Map.Entry[] data2 = new Map.Entry[template.length * 2]; + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 11]; + Arrays.fill(buffer, template[i]); + byte[] vbuffer = new byte[buffer.length]; + Arrays.fill(vbuffer, (byte) (template[i] + 1)); + data2[i] = new UnmodifiableMapEntry(createKey(buffer), createValue(vbuffer)); + } + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 10]; + Arrays.fill(buffer, template[i]); + byte[] vbuffer = new byte[buffer.length]; + Arrays.fill(vbuffer, (byte) (template[i] - 1)); + Map.Entry datum = new UnmodifiableMapEntry(createKey(buffer), createValue(vbuffer)); + data2[i + template.length] = datum; + } + + // create a set with the supplied rewrite strategy + map = new BufferedFileBackedSortedMap.Builder().withComparator(getComparator()).withRewriteStrategy(getRewriteStrategy()).withBufferPersistThreshold(5) + .withMaxOpenFiles(7).withNumRetries(2) + .withHandlerFactories(Collections.singletonList(new BufferedFileBackedSortedMap.SortedMapFileHandlerFactory() { + @Override + public FileSortedMap.SortedMapFileHandler createHandler() throws IOException { + SortedMapTempFileHandler fileHandler = new SortedMapTempFileHandler(); + tempFileHandlers.add(fileHandler); + return fileHandler; + } + + @Override + public boolean isValid() { + return true; + } + })).withMapFactory(getFactory()).build(); + + // adding in the data set multiple times to create underlying files with duplicate values making the + // MergeSortIterator's job a little tougher... + for (int d = 0; d < 11; d++) { + addDataRandomly(map, data); + addDataRandomly(map, data2); + } + + // now test the contents making sure we still have a sorted set with the expected values + int index = 0; + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + int dataIndex = sortedOrder[index++]; + Map.Entry expected = (dataIndex < template.length ? data2[dataIndex] : data[dataIndex]); + testEquality(expected, value); + } + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMapTest.java new file mode 100644 index 00000000000..40e78a805c6 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/BufferedFileBackedSortedMapTest.java @@ -0,0 +1,395 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public abstract class BufferedFileBackedSortedMapTest { + + protected final List tempFileHandlers = new ArrayList<>(); + protected Map.Entry[] data = null; + protected int[] sortedOrder = null; + protected datawave.query.util.sortedmap.BufferedFileBackedSortedMap map = null; + protected final byte[] template = new byte[] {5, 2, 29, 4, 8, 3, 25, 23, 6, 21, 7, 16}; + protected final int[] sortedTemplate = new int[] {1, 5, 3, 0, 8, 10, 4, 11, 9, 7, 6, 2}; + + public abstract Map.Entry createData(byte[] values); + + public abstract Comparator getComparator(); + + public abstract datawave.query.util.sortedmap.FileSortedMap.FileSortedMapFactory getFactory(); + + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return null; + } + + protected void testEquality(SortedMap m1, SortedMap m2) { + for (Map.Entry e1 : m1.entrySet()) { + V v2 = m2.get(e1.getKey()); + testEquality(e1, new UnmodifiableMapEntry(e1.getKey(), v2)); + } + } + + protected void testEquality(Map.Entry expected, Map.Entry value) { + testEquality(expected.getKey(), value.getKey()); + assertEquals(expected.getValue(), value.getValue()); + } + + protected void testEquality(K expected, K value) { + if (map.comparator() != null) { + assertEquals(0, map.comparator().compare(expected, value)); + } else { + assertEquals(expected, value); + } + } + + @Before + public void mapUp() throws Exception { + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 11]; + Arrays.fill(buffer, template[i]); + Map.Entry datum = createData(buffer); + if (i == 0) { + data = (Map.Entry[]) Array.newInstance(datum.getClass(), template.length * 2); + } + data[i] = datum; + } + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 10]; + Arrays.fill(buffer, template[i]); + Map.Entry datum = createData(buffer); + data[i + template.length] = datum; + } + sortedOrder = new int[data.length]; + for (int i = 0; i < template.length; i++) { + sortedOrder[i * 2] = sortedTemplate[i] + sortedTemplate.length; + sortedOrder[i * 2 + 1] = sortedTemplate[i]; + } + map = new datawave.query.util.sortedmap.BufferedFileBackedSortedMap.Builder().withComparator(getComparator()).withRewriteStrategy(getRewriteStrategy()) + .withBufferPersistThreshold(5).withMaxOpenFiles(7).withNumRetries(2).withHandlerFactories( + Collections.singletonList(new datawave.query.util.sortedmap.BufferedFileBackedSortedMap.SortedMapFileHandlerFactory() { + @Override + public FileSortedMap.SortedMapFileHandler createHandler() throws IOException { + datawave.query.util.sortedmap.SortedMapTempFileHandler fileHandler = new datawave.query.util.sortedmap.SortedMapTempFileHandler(); + tempFileHandlers.add(fileHandler); + return fileHandler; + } + + @Override + public boolean isValid() { + return true; + } + })) + .withMapFactory(getFactory()).build(); + + // adding in the data map multiple times to create underlying files with duplicate values making the + // MergeSortIterator's job a little tougher... + for (int d = 0; d < 11; d++) { + addDataRandomly(map, data); + } + while (map.getMaps().size() <= 7) { + addDataRandomly(map, data); + } + } + + public void addDataRandomly(BufferedFileBackedSortedMap map, Map.Entry[] data) { + Set added = new HashSet<>(); + Random random = new Random(); + // add data.length items randomly + for (int i = 0; i < data.length; i++) { + int index = random.nextInt(data.length); + map.put(data[index].getKey(), data[index].getValue()); + added.add(index); + } + // ensure all missing items are added + for (int i = 0; i < data.length; i++) { + if (!added.contains(i)) { + map.put(data[i].getKey(), data[i].getValue()); + } + } + } + + @After + public void tearDown() throws Exception { + // Delete each sorted map file and its checksum. + for (SortedMapTempFileHandler fileHandler : tempFileHandlers) { + File file = fileHandler.getFile(); + tryDelete(file); + File checksum = new File(file.getParent(), "." + file.getName() + ".crc"); + tryDelete(checksum); + } + tempFileHandlers.clear(); + + data = null; + sortedOrder = null; + map.clear(); + map = null; + } + + private void tryDelete(File file) { + if (file.exists()) { + Assert.assertTrue("Failed to delete file " + file, file.delete()); + } + } + + @Test + public void testSize() { + int expectedSize = data.length; + assertEquals(expectedSize, map.size()); + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + expectedSize--; + assertEquals(expectedSize, map.size()); + } + for (int i = 0; i < (data.length / 2); i++) { + map.remove(data[i].getKey()); + expectedSize--; + assertEquals(expectedSize, map.size()); + } + assertEquals(0, map.size()); + for (int i = 0; i < data.length; i++) { + map.put(data[i].getKey(), data[i].getValue()); + expectedSize++; + assertEquals(expectedSize, map.size()); + } + } + + @Test + public void testIsEmpty() { + assertFalse(map.isEmpty()); + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + assertFalse(map.isEmpty()); + } + for (int i = 1; i < (data.length / 2); i++) { + map.remove(data[i].getKey()); + assertFalse(map.isEmpty()); + } + map.remove(data[0].getKey()); + assertTrue(map.isEmpty()); + for (int i = 0; i < data.length; i++) { + map.put(data[i].getKey(), data[i].getValue()); + assertFalse(map.isEmpty()); + } + } + + @Test + public void testClear() { + map.clear(); + assertTrue(map.isEmpty()); + } + + @Test + public void testContainsObject() { + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + } + for (int i = 1; i < (data.length / 2); i++) { + assertTrue(map.containsKey(data[i].getKey())); + } + for (int i = (data.length / 2); i < data.length; i++) { + assertFalse(map.containsKey(data[i].getKey())); + } + } + + @Test + public void testRemove() { + int expectedSize = data.length; + + assertFalse(map.isPersisted()); + for (int i = 0; i < data.length; i++) { + map.remove(data[i].getKey()); + assertEquals(--expectedSize, map.size()); + } + assertTrue(map.isEmpty()); + } + + @Test + public void testRemovePersisted() throws IOException { + int expectedSize = data.length; + + assertFalse(map.isPersisted()); + map.persist(); + assertTrue(map.isPersisted()); + for (int i = 0; i < data.length; i++) { + map.remove(data[i].getKey()); + assertEquals(--expectedSize, map.size()); + assertTrue(map.isPersisted()); + } + assertTrue(map.isEmpty()); + } + + @Test + public void testIterator() { + int index = 0; + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + Map.Entry expected = data[sortedOrder[index++]]; + testEquality(expected, value); + } + map.clear(); + for (Map.Entry value : map.entrySet()) { + fail(); + } + } + + @Test + public void testIteratorRemovePersisted() throws IOException { + int size = map.size(); + int failCount = 0; + + assertFalse(map.isPersisted()); + map.persist(); + assertTrue(map.isPersisted()); + + // calling iterator() will force persistence + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + assertTrue(map.containsKey(value.getKey())); + try { + it.remove(); + fail("Expected iterator remove to fail with a persisted map"); + } catch (Exception e) { + // expected that some of the underlying FileSortedMaps are persisted and hence the remove will fail + failCount++; + assertTrue(map.containsKey(value.getKey())); + assertEquals(size, map.size()); + } + } + assertEquals(size, failCount); + assertFalse(map.isEmpty()); + } + + @Test + public void testIteratorRemove() { + int size = map.size(); + int failCount = 0; + + assertFalse(map.isPersisted()); + + // calling iterator() will force persistence + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + assertTrue(map.containsKey(value.getKey())); + try { + it.remove(); + fail("Expected iterator remove to fail with a persisted map"); + } catch (Exception e) { + // expected that some of the underlying FileSortedMaps are persisted and hence the remove will fail + failCount++; + assertTrue(map.containsKey(value.getKey())); + assertEquals(size, map.size()); + } + } + assertEquals(size, failCount); + assertFalse(map.isEmpty()); + } + + @Test + public void testComparator() { + final Comparator comparator = map.comparator(); + Map.Entry[] testData = Arrays.copyOf(data, data.length); + Arrays.sort(testData, new Comparator>() { + @Override + public int compare(Map.Entry o1, Map.Entry o2) { + return comparator.compare(o1.getKey(), o2.getKey()); + } + }); + int index = 0; + for (Map.Entry value : map.entrySet()) { + Map.Entry expected = data[sortedOrder[index++]]; + testEquality(expected, value); + } + } + + @Test + public void testSubmap() { + int start = sortedOrder.length / 3; + int end = start * 2; + try { + SortedMap submap = map.subMap(data[sortedOrder[start]].getKey(), data[sortedOrder[end]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = start; i < end; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testHeadmap() { + int end = sortedOrder.length / 3; + try { + SortedMap submap = map.headMap(data[sortedOrder[end]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = 0; i < end; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testTailmap() { + int start = sortedOrder.length / 3; + try { + SortedMap submap = map.tailMap(data[sortedOrder[start]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = start; i < sortedOrder.length; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testLastKey() { + Map.Entry expected = data[sortedOrder[data.length - 1]]; + K value = map.lastKey(); + testEquality(expected.getKey(), value); + } + + @Test + public void testFirstKey() { + Map.Entry expected = data[sortedOrder[0]]; + K value = map.firstKey(); + testEquality(expected.getKey(), value); + } + + @Test + public void testCompaction() throws IOException { + assertEquals(8, map.getMaps().size()); + map.persist(); + assertEquals(3, map.getMaps().size()); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/FileSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/FileSortedMapTest.java new file mode 100644 index 00000000000..938e61bf21e --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/FileSortedMapTest.java @@ -0,0 +1,407 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class FileSortedMapTest { + SortedMap data = null; + SortedMap extraData = null; + FileSortedMap map = null; + datawave.query.util.sortedmap.SortedMapTempFileHandler handler = null; + + @Before + public void setUp() throws Exception { + Comparator c = new Comparator() { + @Override + public int compare(Integer o1, Integer o2) { + if (o1 == null) { + return (o2 == null ? 0 : -1); + } else { + return (o2 == null ? 1 : o1.compareTo(o2)); + } + } + }; + handler = new SortedMapTempFileHandler(); + map = new FileSerializableSortedMap<>(c, new FileSerializableSortedMap.SerializableFileHandler(handler), false); + data = new TreeMap<>(c); + Random r = new Random(123948710248L); + // data.add(null); + for (int i = 0; i < 20; i++) { + data.put(r.nextInt(), r.nextInt() + 1); + } + extraData = new TreeMap<>(c); + for (int i = 0; i < 20; i++) { + extraData.put(r.nextInt(), r.nextInt() + 1); + } + // make sure we have no overlap + data.keySet().removeAll(extraData.keySet()); + map.putAll(data); + } + + @After + public void tearDown() { + handler.getFile().delete(); + } + + private void assertSortedMapEquals(SortedMap map1, SortedMap map2) { + assertEquals(map1.size(), map2.size()); + assertTrue(map1.keySet().containsAll(map2.keySet())); + assertTrue(map1.keySet().containsAll(map2.keySet())); + map1.entrySet().stream().forEach(e -> assertEquals(e.getValue(), map2.get(e.getKey()))); + } + + @Test + public void testReadWrite() throws Exception { + assertFalse(map.isPersisted()); + assertSortedMapEquals(data, map); + map.persist(); + assertTrue(map.isPersisted()); + assertTrue(handler.getFile().exists()); + assertSortedMapEquals(data, map); + map.load(); + assertFalse(map.isPersisted()); + assertSortedMapEquals(data, map); + } + + @Test + public void testIsEmpty() throws Exception { + assertFalse(map.isEmpty()); + map.persist(); + assertFalse(map.isEmpty()); + map.clear(); + assertTrue(map.isEmpty()); + map.load(); + assertTrue(map.isEmpty()); + } + + @Test + public void testContains() throws Exception { + SortedMap someData = new TreeMap<>(data); + someData.put(extraData.firstKey(), extraData.get(extraData.firstKey())); + for (Integer i : data.keySet()) { + assertTrue(map.containsKey(i)); + } + for (Integer i : extraData.keySet()) { + assertFalse(map.containsKey(i)); + } + map.persist(); + for (Integer i : data.keySet()) { + assertTrue(map.containsKey(i)); + } + for (Integer i : extraData.keySet()) { + assertFalse(map.containsKey(i)); + } + map.load(); + for (Integer i : data.keySet()) { + assertTrue(map.containsKey(i)); + } + for (Integer i : extraData.keySet()) { + assertFalse(map.containsKey(i)); + } + } + + private void printMap(SortedMap map1, SortedMap map2) { + Iterator> it1 = map1.entrySet().iterator(); + Iterator> it2 = map2.entrySet().iterator(); + while (it1.hasNext() && it2.hasNext()) { + Map.Entry o1 = it1.next(); + Map.Entry o2 = it2.next(); + System.out.println(o1 + " vs " + o2); + } + while (it1.hasNext()) { + Map.Entry o1 = it1.next(); + System.out.println(o1 + " vs (null)"); + } + while (it2.hasNext()) { + Map.Entry o2 = it2.next(); + System.out.println("(null) vs " + o2); + } + } + + @Test + public void testIterator() throws Exception { + SortedMap testData = new TreeMap<>(data); + Iterator> dataIterator = testData.entrySet().iterator(); + Iterator> mapIterator = map.entrySet().iterator(); + while (dataIterator.hasNext()) { + assertTrue(mapIterator.hasNext()); + assertEquals(dataIterator.next(), mapIterator.next()); + } + assertFalse(mapIterator.hasNext()); + for (Map.Entry i : map.entrySet()) { + assertEquals(testData.remove(i.getKey()), i.getValue()); + } + assertTrue(testData.isEmpty()); + map.persist(); + dataIterator = data.entrySet().iterator(); + mapIterator = map.entrySet().iterator(); + while (dataIterator.hasNext()) { + assertTrue(mapIterator.hasNext()); + assertEquals(dataIterator.next(), mapIterator.next()); + } + assertFalse(mapIterator.hasNext()); + testData.putAll(data); + for (Map.Entry i : map.entrySet()) { + assertEquals(testData.remove(i.getKey()), i.getValue()); + } + assertTrue(testData.isEmpty()); + map.load(); + dataIterator = data.entrySet().iterator(); + mapIterator = map.entrySet().iterator(); + while (dataIterator.hasNext()) { + assertTrue(mapIterator.hasNext()); + assertEquals(dataIterator.next(), mapIterator.next()); + } + assertFalse(mapIterator.hasNext()); + testData.putAll(data); + for (Map.Entry i : map.entrySet()) { + assertEquals(testData.remove(i.getKey()), i.getValue()); + } + assertTrue(testData.isEmpty()); + } + + @Test + public void testPut() throws Exception { + assertNull(map.put(extraData.firstKey(), extraData.get(extraData.firstKey()))); + assertEquals(data.size() + 1, map.size()); + assertTrue(map.containsKey(extraData.firstKey())); + assertEquals(map.remove(extraData.firstKey()), extraData.get(extraData.firstKey())); + assertEquals(data.size(), map.size()); + assertFalse(map.containsKey(extraData.firstKey())); + map.persist(); + try { + map.put(extraData.firstKey(), extraData.get(extraData.firstKey())); + fail("Expected persisted map.add to fail"); + } catch (Exception e) { + // expected + } + map.load(); + assertEquals(data.size(), map.size()); + assertFalse(map.containsKey(extraData.firstKey())); + } + + @Test + public void testPutAll() throws Exception { + map.putAll(extraData); + assertEquals(data.size() + extraData.size(), map.size()); + assertTrue(map.entrySet().containsAll(extraData.entrySet())); + assertTrue(map.keySet().removeAll(extraData.keySet())); + assertEquals(data.size(), map.size()); + assertFalse(map.containsKey(extraData.firstKey())); + map.persist(); + try { + map.putAll(extraData); + fail("Expected persisted map.addAll to fail"); + } catch (Exception e) { + // expected + } + map.load(); + assertEquals(data.size(), map.size()); + assertFalse(map.containsKey(extraData.firstKey())); + } + + @Test + public void testRetainAll() throws Exception { + SortedMap someData = new TreeMap<>(data); + someData.remove(data.firstKey()); + someData.remove(data.lastKey()); + someData.put(extraData.firstKey(), extraData.get(extraData.firstKey())); + someData.put(extraData.lastKey(), extraData.get(extraData.lastKey())); + assertFalse(map.keySet().retainAll(data.keySet())); + assertEquals(someData.size(), map.size()); + assertTrue(map.keySet().retainAll(someData.keySet())); + assertEquals(data.size() - 2, map.size()); + assertFalse(map.keySet().containsAll(data.keySet())); + assertFalse(map.keySet().containsAll(someData.keySet())); + assertFalse(map.containsKey(data.lastKey())); + assertTrue(map.keySet().retainAll(extraData.keySet())); + assertTrue(map.isEmpty()); + + map.putAll(data); + map.persist(); + try { + map.keySet().retainAll(someData.keySet()); + fail("Expected persisted map.retainAll to fail"); + } catch (Exception e) { + // expected + } + + map.load(); + assertEquals(data.size(), map.size()); + assertTrue(map.keySet().containsAll(data.keySet())); + } + + @Test + public void testRemoveAll() throws Exception { + SortedMap someData = new TreeMap<>(data); + someData.remove(data.firstKey()); + someData.remove(data.lastKey()); + someData.put(extraData.firstKey(), extraData.get(extraData.firstKey())); + someData.put(extraData.lastKey(), extraData.get(extraData.lastKey())); + assertFalse(map.keySet().removeAll(extraData.keySet())); + assertEquals(someData.size(), map.size()); + assertTrue(map.keySet().removeAll(someData.keySet())); + assertEquals(2, map.size()); + assertFalse(map.keySet().containsAll(data.keySet())); + assertFalse(map.keySet().containsAll(someData.keySet())); + assertTrue(map.keySet().contains(data.firstKey())); + assertTrue(map.keySet().contains(data.lastKey())); + assertTrue(map.keySet().removeAll(data.keySet())); + assertTrue(map.isEmpty()); + + map.putAll(data); + map.persist(); + try { + map.keySet().removeAll(someData.keySet()); + fail("Expected persisted map.retainAll to fail"); + } catch (Exception e) { + // expected + } + + map.load(); + assertEquals(data.size(), map.size()); + assertTrue(map.keySet().containsAll(data.keySet())); + } + + @Test + public void testClear() throws Exception { + map.clear(); + assertTrue(map.isEmpty()); + map.putAll(data); + map.persist(); + map.clear(); + assertTrue(map.isEmpty()); + map.load(); + assertTrue(map.isEmpty()); + } + + @Test + public void testNoComparator() throws Exception { + assertNotNull(map.comparator()); + map.persist(); + assertNotNull(map.comparator()); + map.load(); + assertNotNull(map.comparator()); + SortedMap tempData = new TreeMap<>(); + tempData.putAll(data); + + map = new FileSerializableSortedMap<>(tempData, new FileSerializableSortedMap.SerializableFileHandler(handler), false); + + assertNull(map.comparator()); + assertSortedMapEquals(tempData, map); + for (Integer i : map.keySet()) { + assertEquals(tempData.firstKey(), i); + tempData.remove(tempData.firstKey()); + } + tempData.putAll(data); + assertSortedMapEquals(tempData, map); + map.persist(); + assertNull(map.comparator()); + map.load(); + assertNull(map.comparator()); + + for (Integer i : map.keySet()) { + assertEquals(tempData.firstKey(), i); + tempData.remove(tempData.firstKey()); + } + } + + @Test + public void testSubmap() throws Exception { + Integer fromElement = null; + Integer toElement = null; + int index = 0; + for (Integer i : data.keySet()) { + if (index == (data.size() / 3)) { + fromElement = i; + } else if (index == data.size() * 2 / 3) { + toElement = i; + break; + } + index++; + } + SortedMap submap = map.subMap(fromElement, toElement); + assertSortedMapEquals(data.subMap(fromElement, toElement), submap); + map.persist(); + map.subMap(fromElement, toElement); + assertSortedMapEquals(data.subMap(fromElement, toElement), submap); + map.load(); + submap = map.subMap(fromElement, toElement); + assertSortedMapEquals(data.subMap(fromElement, toElement), submap); + } + + @Test + public void testHeadmap() throws Exception { + Integer toElement = null; + int index = 0; + for (Integer i : data.keySet()) { + if (index == data.size() * 2 / 3) { + toElement = i; + break; + } + index++; + } + SortedMap submap = map.headMap(toElement); + assertSortedMapEquals(data.headMap(toElement), submap); + map.persist(); + map.headMap(toElement); + assertSortedMapEquals(data.headMap(toElement), submap); + map.load(); + submap = map.headMap(toElement); + assertSortedMapEquals(data.headMap(toElement), submap); + } + + @Test + public void testTailmap() throws Exception { + Integer fromElement = null; + int index = 0; + for (Integer i : data.keySet()) { + if (index == (data.size() / 3)) { + fromElement = i; + break; + } + index++; + } + SortedMap submap = map.tailMap(fromElement); + assertSortedMapEquals(data.tailMap(fromElement), submap); + map.persist(); + map.tailMap(fromElement); + assertSortedMapEquals(data.tailMap(fromElement), submap); + map.load(); + submap = map.tailMap(fromElement); + assertSortedMapEquals(data.tailMap(fromElement), submap); + } + + @Test + public void testFirstKey() throws Exception { + assertEquals(data.firstKey(), map.firstKey()); + map.persist(); + assertEquals(data.firstKey(), map.firstKey()); + map.load(); + assertEquals(data.firstKey(), map.firstKey()); + } + + @Test + public void testLast() throws Exception { + assertEquals(data.lastKey(), map.lastKey()); + map.persist(); + assertEquals(data.lastKey(), map.lastKey()); + map.load(); + assertEquals(data.lastKey(), map.lastKey()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/HdfsBackedSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/HdfsBackedSortedMapTest.java new file mode 100644 index 00000000000..9aa0fbcf5fc --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/HdfsBackedSortedMapTest.java @@ -0,0 +1,254 @@ +package datawave.query.util.sortedmap; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import datawave.query.iterator.ivarator.IvaratorCacheDir; +import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; +import datawave.query.util.sortedset.FileSortedSet; +import datawave.query.util.sortedset.HdfsBackedSortedSet; + +public class HdfsBackedSortedMapTest { + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Test + public void persistReloadTest() throws Exception { + File tempDir = temporaryFolder.newFolder(); + + File smallDir = new File(tempDir, "small"); + Assert.assertTrue(smallDir.mkdirs()); + + File largeDir = new File(tempDir, "large"); + Assert.assertTrue(largeDir.mkdirs()); + + LocalFileSystem fs = new LocalFileSystem(); + fs.initialize(tempDir.toURI(), new Configuration()); + + FsStatus fsStatus = fs.getStatus(); + + // set the min remaining MB to something which will cause the 'small' directiory to be skipped + long minRemainingMB = (fsStatus.getRemaining() / 0x100000L) + 4096l; + + List ivaratorCacheDirs = new ArrayList<>(); + ivaratorCacheDirs + .add(new IvaratorCacheDir(new IvaratorCacheDirConfig(smallDir.toURI().toString(), 0, minRemainingMB), fs, smallDir.toURI().toString())); + ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(largeDir.toURI().toString()), fs, largeDir.toURI().toString())); + + String uniquePath = "blah"; + + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet sortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) datawave.query.util.sortedset.HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(ivaratorCacheDirs) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(9999) + .withNumRetries(2) + .withPersistOptions(new datawave.query.util.sortedset.FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + // Add an entry to the sorted set + String someTestString = "some test string"; + sortedSet.add(someTestString); + + // persist the sorted set + sortedSet.persist(); + + Path smallPath = new Path(smallDir.toURI().toString()); + Path smallSubPath = new Path(smallPath, uniquePath); + Path largePath = new Path(largeDir.toURI().toString()); + Path largeSubPath = new Path(largePath, uniquePath); + + // ensure that data was written to the large folder, not the small folder + Assert.assertFalse(fs.exists(smallSubPath)); + Assert.assertEquals(0, fs.listStatus(smallPath).length); + Assert.assertTrue(fs.exists(largeSubPath)); + + FileStatus[] fileStatuses = fs.listStatus(largeSubPath); + Assert.assertEquals(1, fileStatuses.length); + Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet")); + + // Now make sure reloading an ivarator cache dir works + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet reloadedSortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) datawave.query.util.sortedset.HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(ivaratorCacheDirs) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(9999) + .withNumRetries(2) + .withPersistOptions(new datawave.query.util.sortedset.FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + Assert.assertEquals(1, reloadedSortedSet.size()); + Assert.assertEquals(someTestString, reloadedSortedSet.first()); + } + + @Test + public void persistCompactReloadTest() throws Exception { + File tempDir = temporaryFolder.newFolder(); + + File[] dirs = new File[] {new File(tempDir, "first"), new File(tempDir, "second"), new File(tempDir, "third")}; + + for (File dir : dirs) + Assert.assertTrue(dir.mkdirs()); + + String uniquePath = "blah"; + + Path[] paths = Arrays.stream(dirs).map(dir -> new Path(dir.toURI().toString())).toArray(Path[]::new); + Path[] subPaths = Arrays.stream(paths).map(path -> new Path(path, uniquePath)).toArray(Path[]::new); + + LocalFileSystem fs = new LocalFileSystem(); + fs.initialize(tempDir.toURI(), new Configuration()); + + // set the min remaining percent to something which will cause the second directory to be skipped + double minRemainingPercent = 1.0; + + List ivaratorCacheDirs = new ArrayList<>(); + for (File dir : dirs) { + if (dir.getName().equalsIgnoreCase("second")) + ivaratorCacheDirs.add( + new IvaratorCacheDir(new IvaratorCacheDirConfig(dir.toURI().toString(), 0, minRemainingPercent), fs, dir.toURI().toString())); + else + ivaratorCacheDirs.add(new IvaratorCacheDir(new IvaratorCacheDirConfig(dir.toURI().toString(), 1), fs, dir.toURI().toString())); + } + + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet firstSortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) datawave.query.util.sortedset.HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(Collections.singletonList(ivaratorCacheDirs.get(0))) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(9999) + .withNumRetries(2) + .withPersistOptions(new datawave.query.util.sortedset.FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + // Add an entry to the first sorted set + String someTestString = "some test string"; + firstSortedSet.add(someTestString); + + // persist the sorted set + firstSortedSet.persist(); + + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet thirdSortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) datawave.query.util.sortedset.HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(Collections.singletonList(ivaratorCacheDirs.get(2))) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(9999) + .withNumRetries(2) + .withPersistOptions(new datawave.query.util.sortedset.FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + // Add an entry to the third sorted set + String anotherTestString = "another test string"; + thirdSortedSet.add(anotherTestString); + + // persist the sorted set + thirdSortedSet.persist(); + + // ensure that data was written to the first and third folders + Assert.assertTrue(fs.exists(subPaths[0])); + Assert.assertTrue(fs.exists(subPaths[2])); + + // ensure that data was not written to the second folder + Assert.assertFalse(fs.exists(subPaths[1])); + Assert.assertEquals(0, fs.listStatus(paths[1]).length); + + // ensure that 1 file was written to the first folder + FileStatus[] fileStatuses = fs.listStatus(subPaths[0]); + Assert.assertEquals(1, fileStatuses.length); + Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet")); + + // ensure that 1 file was written to the third folder + fileStatuses = fs.listStatus(subPaths[2]); + Assert.assertEquals(1, fileStatuses.length); + Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet")); + + // Now make sure reloading an ivarator cache dir works, and set maxOpenFiles to 1 so that we compact during the next persist + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet reloadedSortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) datawave.query.util.sortedset.HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(ivaratorCacheDirs) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(1) + .withNumRetries(2) + .withPersistOptions(new datawave.query.util.sortedset.FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + // Ensure that we have 2 entries total + Assert.assertEquals(2, reloadedSortedSet.size()); + + // This is what we expect to be loaded by the set + List results = new ArrayList<>(); + results.add(someTestString); + results.add(anotherTestString); + + // for each result we find, remove it from the results list and ensure that the list is empty when we're done + reloadedSortedSet.iterator().forEachRemaining(results::remove); + Assert.assertTrue(results.isEmpty()); + + // Finally, add an entry to the reloaded sorted set + String lastTestString = "last test string"; + reloadedSortedSet.add(lastTestString); + + // persist the sorted set (this should cause a compaction down to 1 file) + reloadedSortedSet.persist(); + + // ensure that data was not written to the second folder + Assert.assertFalse(fs.exists(subPaths[1])); + Assert.assertEquals(0, fs.listStatus(paths[1]).length); + + // ensure that while the folder still exists, data no longer exists for the third folder + Assert.assertTrue(fs.exists(subPaths[2])); + Assert.assertEquals(0, fs.listStatus(subPaths[2]).length); + + // ensure that all data exists in the first folder + fileStatuses = fs.listStatus(subPaths[0]); + Assert.assertEquals(1, fileStatuses.length); + Assert.assertTrue(fileStatuses[0].getPath().getName().startsWith("SortedSet")); + + // Finally, make sure that the compacted data can be reloaded + // @formatter:off + @SuppressWarnings("unchecked") + datawave.query.util.sortedset.HdfsBackedSortedSet compactedSortedSet = (datawave.query.util.sortedset.HdfsBackedSortedSet) HdfsBackedSortedSet.builder() + .withIvaratorCacheDirs(ivaratorCacheDirs) + .withUniqueSubPath(uniquePath) + .withMaxOpenFiles(9999) + .withNumRetries(2) + .withPersistOptions(new FileSortedSet.PersistOptions()) + .build(); + // @formatter:on + + // Ensure that we have 3 entries total + Assert.assertEquals(3, compactedSortedSet.size()); + + // This is what we expect to be loaded by the set + results.clear(); + results.add(someTestString); + results.add(anotherTestString); + results.add(lastTestString); + + // for each result we find, remove it from the results list and ensure that the list is empty when we're done + compactedSortedSet.iterator().forEachRemaining(results::remove); + Assert.assertTrue(results.isEmpty()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MergeSortIteratorTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MergeSortIteratorTest.java new file mode 100644 index 00000000000..b0aee4bda10 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MergeSortIteratorTest.java @@ -0,0 +1,241 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.junit.Test; + +public class MergeSortIteratorTest { + + @Test + public void testIteration() { + SortedMap set1 = new TreeMap<>(); + SortedMap set2 = new TreeMap<>(); + SortedMap set3 = new TreeMap<>(); + + set1.put(1, 1); + set1.put(3, 1); + set1.put(4, 1); + set1.put(5, 1); + set1.put(6, 1); + set1.put(10, 1); + + set2.put(1, 1); + set2.put(2, 1); + set2.put(5, 1); + set2.put(20, 1); + + set3.put(2, 1); + set3.put(5, 1); + set3.put(6, 1); + set3.put(30, 1); + + List expected = new ArrayList<>(); + expected.add(1); + expected.add(2); + expected.add(3); + expected.add(4); + expected.add(5); + expected.add(6); + expected.add(10); + expected.add(20); + expected.add(30); + + List> col = new ArrayList<>(); + col.add(set1); + col.add(set2); + col.add(set3); + List results = new ArrayList<>(); + + Iterator it = new MultiMapBackedSortedMap(col).keySet().iterator(); + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + while (it.hasNext()) { + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + Integer next = it.next(); + results.add(next); + assertTrue(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + it.remove(); + assertFalse(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + } + assertEquals(expected, results); + assertTrue(set1.isEmpty() && set2.isEmpty() && set3.isEmpty()); + } + + @Test + public void testIterationSansHasNext() { + SortedMap set1 = new TreeMap<>(); + SortedMap set2 = new TreeMap<>(); + SortedMap set3 = new TreeMap<>(); + + set1.put(1, 1); + set1.put(3, 1); + set1.put(4, 1); + set1.put(5, 1); + set1.put(6, 1); + set1.put(10, 1); + + set2.put(1, 1); + set2.put(2, 1); + set2.put(5, 1); + set2.put(20, 1); + + set3.put(2, 1); + set3.put(5, 1); + set3.put(6, 1); + set3.put(30, 1); + + List expected = new ArrayList<>(); + expected.add(1); + expected.add(2); + expected.add(3); + expected.add(4); + expected.add(5); + expected.add(6); + expected.add(10); + expected.add(20); + expected.add(30); + + List> col = new ArrayList<>(); + col.add(set1); + col.add(set2); + col.add(set3); + List results = new ArrayList<>(); + Iterator it = new MultiMapBackedSortedMap(col).keySet().iterator(); + while (true) { + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + Integer next; + try { + next = it.next(); + } catch (NoSuchElementException nsee) { + break; + } + results.add(next); + assertTrue(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + it.remove(); + assertFalse(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + } + assertEquals(expected, results); + assertTrue(set1.isEmpty() && set2.isEmpty() && set3.isEmpty()); + } + + @Test + public void testIterationSansWithNulls() { + Comparator c = new Comparator() { + + @Override + public int compare(Integer o1, Integer o2) { + if (o1 == null) { + return (o2 == null ? 0 : -1); + } else { + return (o2 == null ? 1 : o1.compareTo(o2)); + } + } + }; + + SortedMap set1 = new TreeMap<>(c); + SortedMap set2 = new TreeMap<>(c); + SortedMap set3 = new TreeMap<>(c); + + set1.put(1, 1); + set1.put(3, 1); + set1.put(4, 1); + set1.put(5, 1); + set1.put(6, 1); + set1.put(10, 1); + + set2.put(null, 1); + set2.put(1, 1); + set2.put(2, 1); + set2.put(5, 1); + set2.put(20, 1); + + set3.put(null, 1); + set3.put(2, 1); + set3.put(5, 1); + set3.put(6, 1); + set3.put(30, 1); + + List expected = new ArrayList<>(); + expected.add(null); + expected.add(1); + expected.add(2); + expected.add(3); + expected.add(4); + expected.add(5); + expected.add(6); + expected.add(10); + expected.add(20); + expected.add(30); + + List> col = new ArrayList<>(); + col.add(set1); + col.add(set2); + col.add(set3); + List results = new ArrayList<>(); + Iterator it = new MultiMapBackedSortedMap(col).keySet().iterator(); + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + while (it.hasNext()) { + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + Integer next = it.next(); + results.add(next); + assertTrue(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + it.remove(); + assertFalse(set1.containsKey(next) || set2.containsKey(next) || set3.containsKey(next)); + try { + it.remove(); + fail("Expected remove to fail"); + } catch (Exception e) { + // expected + } + } + assertEquals(expected, results); + assertTrue(set1.isEmpty() && set2.isEmpty() && set3.isEmpty()); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedKeyValueSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedKeyValueSortedMapTest.java new file mode 100644 index 00000000000..9eb9c4bcaac --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedKeyValueSortedMapTest.java @@ -0,0 +1,53 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; + +import java.util.Comparator; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; + +public class MultiMapBackedKeyValueSortedMapTest extends MultiMapBackedRewritableSortedMapTest { + + private Comparator keyComparator = new Comparator<>() { + @Override + public int compare(Key o1, Key o2) { + return o1.compareTo(o2); + } + }; + + private FileSortedMap.RewriteStrategy keyValueComparator = new FileSortedMap.RewriteStrategy<>() { + @Override + public boolean rewrite(Key key, Value original, Value update) { + return original.compareTo(update) < 0; + } + }; + + @Override + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return keyValueComparator; + } + + @Override + public Key createKey(byte[] values) { + return new Key(values); + } + + @Override + public Value createValue(byte[] values) { + return new Value(values); + } + + @Override + public void testEquality(Map.Entry expected, Map.Entry value) { + assertEquals(expected.getKey(), value.getKey()); + assertEquals(expected.getValue(), value.getValue()); + } + + @Override + public Comparator getComparator() { + return keyComparator; + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedRewritableSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedRewritableSortedMapTest.java new file mode 100644 index 00000000000..377878a31c3 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedRewritableSortedMapTest.java @@ -0,0 +1,89 @@ +package datawave.query.util.sortedmap; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.junit.Test; + +public abstract class MultiMapBackedRewritableSortedMapTest extends MultiMapBackedSortedMapTest { + + /** + * Create a key given the specified value. This key should sort in the same way the underlying byte array will sort against other byte array. + * + * @param values + * @return The key + */ + public abstract K createKey(byte[] values); + + /** + * Create a value given the specified value. + * + * @param values + * @return The value + */ + public abstract V createValue(byte[] values); + + /** + * Get a rewrite strategy. This strategy should allow rewrites if the value is smaller. + * + * @return the rewrite strategy appropriate for key and value types + */ + @Override + public abstract FileSortedMap.RewriteStrategy getRewriteStrategy(); + + @Override + public Map.Entry createData(byte[] values) { + byte[] vbuffer = new byte[values.length]; + Arrays.fill(vbuffer, (byte) (values[0] + 1)); + return new UnmodifiableMapEntry(createKey(values), createValue(vbuffer)); + } + + @Test + public void testRewrite() throws Exception { + // create a new set of data, only half of which has greater Values + Map.Entry[] data2 = new Map.Entry[template.length * 2]; + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 11]; + Arrays.fill(buffer, template[i]); + byte[] vbuffer = new byte[buffer.length]; + Arrays.fill(vbuffer, (byte) (template[i] + 1)); + data2[i] = new UnmodifiableMapEntry(createKey(buffer), createValue(vbuffer)); + } + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 10]; + Arrays.fill(buffer, template[i]); + byte[] vbuffer = new byte[buffer.length]; + Arrays.fill(vbuffer, (byte) (template[i] - 1)); + Map.Entry datum = new UnmodifiableMapEntry(createKey(buffer), createValue(vbuffer)); + data2[i + template.length] = datum; + } + + // create a new map with the supplied rewrite strategy + maps = new ArrayList<>(); + for (int i = 0; i < 7; i++) { + maps.add(new TreeMap(getComparator())); + } + map = new MultiMapBackedSortedMap(maps); + map.setRewriteStrategy(getRewriteStrategy()); + + // adding in the data set multiple times to create underlying files with duplicate values making the + // MergeSortIterator's job a little tougher... + for (int d = 0; d < 11; d++) { + addDataRandomly(maps, data); + addDataRandomly(maps, data2); + } + + // now test the contents making sure we still have a sorted set with the expected values + int index = 0; + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + int dataIndex = sortedOrder[index++]; + Map.Entry expected = (dataIndex < template.length ? data2[dataIndex] : data[dataIndex]); + testEquality(expected, value); + } + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedSortedMapTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedSortedMapTest.java new file mode 100644 index 00000000000..f171947e3e5 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/MultiMapBackedSortedMapTest.java @@ -0,0 +1,327 @@ +package datawave.query.util.sortedmap; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public abstract class MultiMapBackedSortedMapTest { + + protected final List tempFileHandlers = new ArrayList<>(); + protected Map.Entry[] data = null; + protected int[] sortedOrder = null; + protected MultiMapBackedSortedMap map = null; + protected List maps = null; + protected final byte[] template = new byte[] {5, 2, 29, 4, 8, 3, 25, 23, 6, 21, 7, 16}; + protected final int[] sortedTemplate = new int[] {1, 5, 3, 0, 8, 10, 4, 11, 9, 7, 6, 2}; + + public abstract Map.Entry createData(byte[] values); + + public abstract Comparator getComparator(); + + public FileSortedMap.RewriteStrategy getRewriteStrategy() { + return null; + } + + protected void testEquality(SortedMap m1, SortedMap m2) { + for (Map.Entry e1 : m1.entrySet()) { + V v2 = m2.get(e1.getKey()); + testEquality(e1, new UnmodifiableMapEntry(e1.getKey(), v2)); + } + } + + protected void testEquality(Map.Entry expected, Map.Entry value) { + testEquality(expected.getKey(), value.getKey()); + assertEquals(expected.getValue(), value.getValue()); + } + + protected void testEquality(K expected, K value) { + if (map.comparator() != null) { + assertEquals(0, map.comparator().compare(expected, value)); + } else { + assertEquals(expected, value); + } + } + + @Before + public void mapUp() throws Exception { + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 11]; + Arrays.fill(buffer, template[i]); + Map.Entry datum = createData(buffer); + if (i == 0) { + data = (Map.Entry[]) Array.newInstance(datum.getClass(), template.length * 2); + } + data[i] = datum; + } + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 10]; + Arrays.fill(buffer, template[i]); + Map.Entry datum = createData(buffer); + data[i + template.length] = datum; + } + sortedOrder = new int[data.length]; + for (int i = 0; i < template.length; i++) { + sortedOrder[i * 2] = sortedTemplate[i] + sortedTemplate.length; + sortedOrder[i * 2 + 1] = sortedTemplate[i]; + } + maps = new ArrayList<>(); + for (int i = 0; i < 7; i++) { + maps.add(new TreeMap(getComparator())); + } + map = new MultiMapBackedSortedMap(maps); + map.setRewriteStrategy(getRewriteStrategy()); + + // adding in the data map multiple times to create duplicate values across the underlying maps + // MergeSortIterator's job a little tougher... + for (int i = 0; i < 4; i++) { + for (int d = 0; d < 11; d++) { + addDataRandomly(maps, data); + } + } + } + + public void putRandomly(List maps, K key, V value) { + Random random = new Random(); + int mapIndex = random.nextInt(maps.size()); + maps.get(mapIndex).put(key, value); + } + + public void addDataRandomly(List maps, Map.Entry[] data) { + Set added = new HashSet<>(); + Random random = new Random(); + // add data.length items randomly + for (int i = 0; i < data.length; i++) { + int index = random.nextInt(data.length); + putRandomly(maps, data[index].getKey(), data[index].getValue()); + added.add(index); + } + // ensure all missing items are added + for (int i = 0; i < data.length; i++) { + if (!added.contains(i)) { + putRandomly(maps, data[i].getKey(), data[i].getValue()); + } + } + } + + @After + public void tearDown() throws Exception { + // Delete each sorted map file and its checksum. + for (SortedMapTempFileHandler fileHandler : tempFileHandlers) { + File file = fileHandler.getFile(); + tryDelete(file); + File checksum = new File(file.getParent(), "." + file.getName() + ".crc"); + tryDelete(checksum); + } + tempFileHandlers.clear(); + + data = null; + sortedOrder = null; + map.clear(); + map = null; + } + + private void tryDelete(File file) { + if (file.exists()) { + Assert.assertTrue("Failed to delete file " + file, file.delete()); + } + } + + @Test + public void testSize() { + int expectedSize = data.length; + assertEquals(expectedSize, map.size()); + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + expectedSize--; + assertEquals(expectedSize, map.size()); + } + for (int i = 0; i < (data.length / 2); i++) { + map.remove(data[i].getKey()); + expectedSize--; + assertEquals(expectedSize, map.size()); + } + assertEquals(0, map.size()); + for (int i = 0; i < data.length; i++) { + putRandomly(maps, data[i].getKey(), data[i].getValue()); + expectedSize++; + assertEquals(expectedSize, map.size()); + } + } + + @Test + public void testIsEmpty() { + assertFalse(map.isEmpty()); + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + assertFalse(map.isEmpty()); + } + for (int i = 1; i < (data.length / 2); i++) { + map.remove(data[i].getKey()); + assertFalse(map.isEmpty()); + } + map.remove(data[0].getKey()); + assertTrue(map.isEmpty()); + for (int i = 0; i < data.length; i++) { + putRandomly(maps, data[i].getKey(), data[i].getValue()); + assertFalse(map.isEmpty()); + } + } + + @Test + public void testClear() { + map.clear(); + assertTrue(map.isEmpty()); + } + + @Test + public void testContainsObject() { + for (int i = (data.length / 2); i < data.length; i++) { + map.remove(data[i].getKey()); + } + for (int i = 1; i < (data.length / 2); i++) { + assertTrue(map.containsKey(data[i].getKey())); + } + for (int i = (data.length / 2); i < data.length; i++) { + assertFalse(map.containsKey(data[i].getKey())); + } + } + + @Test + public void testRemove() { + int expectedSize = data.length; + + for (int i = 0; i < data.length; i++) { + map.remove(data[i].getKey()); + assertEquals(--expectedSize, map.size()); + } + assertTrue(map.isEmpty()); + } + + @Test + public void testIterator() { + int index = 0; + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + Map.Entry expected = data[sortedOrder[index++]]; + testEquality(expected, value); + } + map.clear(); + for (Map.Entry value : map.entrySet()) { + fail(); + } + } + + @Test + public void testIteratorRemove() { + int size = map.size(); + + for (Iterator> it = map.entrySet().iterator(); it.hasNext();) { + Map.Entry value = it.next(); + assertTrue(map.containsKey(value.getKey())); + it.remove(); + size--; + assertFalse(map.containsKey(value.getKey())); + assertEquals(size, map.size()); + } + assertTrue(map.isEmpty()); + } + + @Test + public void testComparator() { + final Comparator comparator = map.comparator(); + Map.Entry[] testData = Arrays.copyOf(data, data.length); + Arrays.sort(testData, new Comparator>() { + @Override + public int compare(Map.Entry o1, Map.Entry o2) { + return comparator.compare(o1.getKey(), o2.getKey()); + } + }); + int index = 0; + for (Map.Entry value : map.entrySet()) { + Map.Entry expected = data[sortedOrder[index++]]; + testEquality(expected, value); + } + } + + @Test + public void testSubmap() { + int start = sortedOrder.length / 3; + int end = start * 2; + try { + SortedMap submap = map.subMap(data[sortedOrder[start]].getKey(), data[sortedOrder[end]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = start; i < end; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testHeadmap() { + int end = sortedOrder.length / 3; + try { + SortedMap submap = map.headMap(data[sortedOrder[end]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = 0; i < end; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testTailmap() { + int start = sortedOrder.length / 3; + try { + SortedMap submap = map.tailMap(data[sortedOrder[start]].getKey()); + SortedMap expected = new TreeMap<>(map.comparator()); + for (int i = start; i < sortedOrder.length; i++) { + expected.put(data[sortedOrder[i]].getKey(), data[sortedOrder[i]].getValue()); + } + testEquality(expected, submap); + } catch (Exception e) { + // expected + } + } + + @Test + public void testLastKey() { + Map.Entry expected = data[sortedOrder[data.length - 1]]; + K value = map.lastKey(); + testEquality(expected.getKey(), value); + } + + @Test + public void testFirstKey() { + Map.Entry expected = data[sortedOrder[0]]; + K value = map.firstKey(); + testEquality(expected.getKey(), value); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumenTransformsTest.java b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumenTransformsTest.java new file mode 100644 index 00000000000..e4ac44d3bf2 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/util/sortedmap/rfile/KeyValueByteDocumenTransformsTest.java @@ -0,0 +1,107 @@ +package datawave.query.util.sortedmap.rfile; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.commons.collections.keyvalue.UnmodifiableMapEntry; +import org.junit.Test; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.query.attributes.Attribute; +import datawave.query.attributes.Attributes; +import datawave.query.attributes.Document; +import datawave.query.composite.CompositeMetadata; +import datawave.query.predicate.EventDataQueryFieldFilter; +import datawave.query.util.TypeMetadata; + +public class KeyValueByteDocumenTransformsTest { + + protected final byte[] template = new byte[] {5, 2, 29, 4, 8, 3, 25, 23, 6, 21, 7, 16}; + + @Test + public void testDocumentTransforms() { + List docs = createDocuments(); + for (Document d : docs) { + Value v = KeyValueByteDocumentTransforms.documentToValue(d); + Document d2 = KeyValueByteDocumentTransforms.valueToDocument(v); + assertDocumentEquals(d, d2); + } + } + + @Test + public void testByteTransforms() { + List docs = createByteArrays(); + for (byte[] d : docs) { + Key k = KeyValueByteDocumentTransforms.byteToKey(d); + byte[] d2 = KeyValueByteDocumentTransforms.keyToByte(k); + assertArrayEquals(d, d2); + } + } + + public List createByteArrays() { + List docs = new ArrayList<>(); + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 11]; + Arrays.fill(buffer, template[i]); + docs.add(buffer); + } + for (int i = 0; i < template.length; i++) { + byte[] buffer = new byte[i + 10]; + Arrays.fill(buffer, template[i]); + docs.add(buffer); + } + return docs; + } + + public List createDocuments() { + List docs = new ArrayList<>(); + for (byte[] buffer : createByteArrays()) { + docs.add(createDocument(buffer)); + } + return docs; + } + + public Document createDocument(byte[] values) { + Key docKey = new Key("20200101_1", "datatype\u0000uid", "", values[0]); + Key attrKey = new Key("20200101_1", "datatype\u0000uid", "FIELD\u0000VALUE", values[0]); + List> attrs = new ArrayList<>(); + attrs.add(new UnmodifiableMapEntry(attrKey, new Value())); + Document doc = new Document(docKey, Collections.singleton(docKey), false, attrs.iterator(), + new TypeMetadata().put("FIELD", "datatype", LcNoDiacriticsType.class.getName()), new CompositeMetadata(), true, true, + new EventDataQueryFieldFilter()); + return doc; + } + + public static void assertDocumentEquals(Document d, Document d2) { + // a document comparison that does not include comparing metadata as that does not survive the serialization process + assertEquals(getDictionary(d), getDictionary(d2)); + } + + public static Multimap getDictionary(Document d) { + Multimap map = HashMultimap.create(); + for (Map.Entry>> e : d.entrySet()) { + String key = e.getKey(); + Attribute a = e.getValue(); + if (a instanceof Attributes) { + for (Attribute a2 : ((Attributes) a).getAttributes()) { + map.put(key, String.valueOf(a2.getData())); + } + } else { + map.put(key, String.valueOf(a.getData())); + } + } + return map; + } + +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index ed6d1e2345c..c86a2ee30ef 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -28,11 +28,23 @@ - + + + + + + + + + + + + + diff --git a/web-services/cached-results/src/main/java/datawave/webservice/results/cached/CachedRunningQuery.java b/web-services/cached-results/src/main/java/datawave/webservice/results/cached/CachedRunningQuery.java index b2a4658acd8..da9e2cd1ffe 100644 --- a/web-services/cached-results/src/main/java/datawave/webservice/results/cached/CachedRunningQuery.java +++ b/web-services/cached-results/src/main/java/datawave/webservice/results/cached/CachedRunningQuery.java @@ -306,7 +306,7 @@ public CachedRunningQuery(Query query, QueryLogic queryLogic, String queryId, public CachedRunningQuery(Connection connection, Query query, QueryLogic queryLogic, String queryId, String alias, String user, String view, String fields, String conditions, String grouping, String order, int pagesize, Set variableFields, Set fixedFieldsInEvent, - QueryMetricFactory metricFactory) throws SQLException { + QueryMetricFactory metricFactory) throws SQLException, QueryException { super(metricFactory); this.variableFields.clear(); @@ -574,7 +574,7 @@ private List getViewColumnNames(Connection connection, String view) thro return columns; } - public void activate(Connection connection, QueryLogic queryLogic) throws SQLException { + public void activate(Connection connection, QueryLogic queryLogic) throws SQLException, QueryException { this.connection = connection; this.transformer = queryLogic.getEnrichedTransformer(this.query); diff --git a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml index 08cc4b74187..ad53f615676 100644 --- a/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml +++ b/web-services/deploy/configuration/src/main/resources/datawave/query/QueryLogicFactory.xml @@ -38,15 +38,23 @@ - + - + + + + + + + + +