diff --git a/luwak/src/main/java/uk/co/flax/luwak/Monitor.java b/luwak/src/main/java/uk/co/flax/luwak/Monitor.java index e3cee1b35..0a7178cac 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/Monitor.java +++ b/luwak/src/main/java/uk/co/flax/luwak/Monitor.java @@ -231,7 +231,8 @@ private void prepareQueryCache(boolean storeQueries) throws IOException { } seenIds.add(id); - BytesRef serializedMQ = dataValues.mq.get(dataValues.doc); + dataValues.advance(dataValues.doc); + BytesRef serializedMQ = dataValues.mq.binaryValue(); MonitorQuery mq = MonitorQuery.deserialize(serializedMQ); BytesRef hash = mq.hash(); @@ -494,7 +495,8 @@ public MonitorQuery getQuery(final String queryId) throws IOException { throw new IllegalStateException("Cannot call getQuery() as queries are not stored"); final MonitorQuery[] queryHolder = new MonitorQuery[]{ null }; queryIndex.search(new TermQuery(new Term(FIELDS.id, queryId)), (id, query, dataValues) -> { - BytesRef serializedMQ = dataValues.mq.get(dataValues.doc); + dataValues.advance(dataValues.doc); + BytesRef serializedMQ = dataValues.mq.binaryValue(); queryHolder[0] = MonitorQuery.deserialize(serializedMQ); }); return queryHolder[0]; diff --git a/luwak/src/main/java/uk/co/flax/luwak/QueryIndex.java b/luwak/src/main/java/uk/co/flax/luwak/QueryIndex.java index 7e628e942..3e1106375 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/QueryIndex.java +++ b/luwak/src/main/java/uk/co/flax/luwak/QueryIndex.java @@ -30,7 +30,7 @@ class QueryIndex { // NB this is not final because it can be replaced by purgeCache() // package-private for testing - final Map termFilters = new HashMap<>(); + final Map termFilters = new HashMap<>(); QueryIndex(IndexWriter indexWriter) throws IOException { this.writer = indexWriter; @@ -46,8 +46,8 @@ private class TermsHashBuilder extends SearcherFactory { public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) throws IOException { IndexSearcher searcher = super.newSearcher(reader, previousReader); searcher.setQueryCache(null); - termFilters.put(reader, new QueryTermFilter(reader)); - reader.addReaderClosedListener(termFilters::remove); + termFilters.put(reader.getReaderCacheHelper().getKey(), new QueryTermFilter(reader)); + reader.getReaderCacheHelper().addClosedListener(termFilters::remove); return searcher; } } @@ -108,7 +108,7 @@ long search(QueryBuilder queryBuilder, QueryCollector matcher) throws IOExceptio MonitorQueryCollector collector = new MonitorQueryCollector(queries, matcher); long buildTime = System.nanoTime(); - Query query = queryBuilder.buildQuery(termFilters.get(searcher.getIndexReader())); + Query query = queryBuilder.buildQuery(termFilters.get(searcher.getIndexReader().getReaderCacheHelper().getKey())); buildTime = System.nanoTime() - buildTime; searcher.search(query, collector); return buildTime; @@ -222,6 +222,15 @@ static final class DataValues { public BinaryDocValues mq; public Scorer scorer; public int doc; + + void advance(int doc) throws IOException { + this.doc = doc; + hash.advanceExact(doc); + id.advanceExact(doc); + if (mq != null) { + mq.advanceExact(doc); + } + } } /** @@ -245,10 +254,10 @@ public void setScorer(Scorer scorer) throws IOException { @Override public void collect(int doc) throws IOException { - BytesRef hash = dataValues.hash.get(doc); - BytesRef id = dataValues.id.get(doc); + dataValues.advance(doc); + BytesRef hash = dataValues.hash.binaryValue(); + BytesRef id = dataValues.id.binaryValue(); QueryCacheEntry query = queries.get(hash); - dataValues.doc = doc; matcher.matchQuery(id.utf8ToString(), query, dataValues); } diff --git a/luwak/src/main/java/uk/co/flax/luwak/presearcher/FieldFilterPresearcherComponent.java b/luwak/src/main/java/uk/co/flax/luwak/presearcher/FieldFilterPresearcherComponent.java index 206e3cc90..2a512c0d3 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/presearcher/FieldFilterPresearcherComponent.java +++ b/luwak/src/main/java/uk/co/flax/luwak/presearcher/FieldFilterPresearcherComponent.java @@ -75,7 +75,7 @@ public Query adjustPresearcherQuery(LeafReader reader, Query presearcherQuery) t private Query buildFilterClause(LeafReader reader) throws IOException { - Terms terms = reader.fields().terms(field); + Terms terms = reader.terms(field); if (terms == null) return null; diff --git a/luwak/src/main/java/uk/co/flax/luwak/presearcher/MultipassTermFilteredPresearcher.java b/luwak/src/main/java/uk/co/flax/luwak/presearcher/MultipassTermFilteredPresearcher.java index cd86eba2d..e582d1f2b 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/presearcher/MultipassTermFilteredPresearcher.java +++ b/luwak/src/main/java/uk/co/flax/luwak/presearcher/MultipassTermFilteredPresearcher.java @@ -1,22 +1,21 @@ package uk.co.flax.luwak.presearcher; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.Term; -import org.apache.lucene.queries.TermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import uk.co.flax.luwak.analysis.TermsEnumTokenStream; import uk.co.flax.luwak.termextractor.querytree.QueryTree; import uk.co.flax.luwak.termextractor.weights.TermWeightor; +import uk.co.flax.luwak.util.CollectionUtils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; /* * Copyright (c) 2014 Lemur Consulting Ltd. @@ -96,27 +95,33 @@ static String field(String field, int pass) { private class MultipassDocumentQueryBuilder implements DocumentQueryBuilder { BooleanQuery.Builder[] queries = new BooleanQuery.Builder[passes]; - List> terms = new ArrayList>(passes); + Map terms = new HashMap<>(); public MultipassDocumentQueryBuilder() { for (int i = 0; i < queries.length; i++) { queries[i] = new BooleanQuery.Builder(); - terms.add(i, new ArrayList()); } } @Override public void addTerm(String field, BytesRef term) throws IOException { - for (int i = 0; i < passes; i++) { - terms.get(i).add(new Term(field(field, i), term)); - } + BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash()); + t.add(term); } @Override public Query build() { + Map collectedTerms = new HashMap<>(); + for (String field : terms.keySet()) { + collectedTerms.put(field, CollectionUtils.convertHash(terms.get(field))); + } BooleanQuery.Builder parent = new BooleanQuery.Builder(); for (int i = 0; i < passes; i++) { - parent.add(new TermsQuery(terms.get(i)), BooleanClause.Occur.MUST); + BooleanQuery.Builder child = new BooleanQuery.Builder(); + for (String field : terms.keySet()) { + child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD); + } + parent.add(child.build(), BooleanClause.Occur.MUST); } return parent.build(); } diff --git a/luwak/src/main/java/uk/co/flax/luwak/presearcher/TermFilteredPresearcher.java b/luwak/src/main/java/uk/co/flax/luwak/presearcher/TermFilteredPresearcher.java index 63b5036c5..5b79869dd 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/presearcher/TermFilteredPresearcher.java +++ b/luwak/src/main/java/uk/co/flax/luwak/presearcher/TermFilteredPresearcher.java @@ -16,21 +16,20 @@ * limitations under the License. */ -import java.io.IOException; -import java.io.PrintStream; -import java.util.*; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.*; -import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; @@ -45,6 +44,15 @@ import uk.co.flax.luwak.termextractor.querytree.QueryTreeViewer; import uk.co.flax.luwak.termextractor.weights.TermWeightor; import uk.co.flax.luwak.termextractor.weights.TokenLengthNorm; +import uk.co.flax.luwak.util.CollectionUtils; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Presearcher implementation that uses terms extracted from queries to index @@ -96,7 +104,9 @@ public TermFilteredPresearcher(PresearcherComponent... components) { public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) { try { DocumentQueryBuilder queryBuilder = getQueryBuilder(); - for (String field : reader.fields()) { + for (FieldInfo fi : reader.getFieldInfos()) { + + final String field = fi.name; TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator()); for (PresearcherComponent component : components) { @@ -134,16 +144,21 @@ public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter protected DocumentQueryBuilder getQueryBuilder() { return new DocumentQueryBuilder() { - List terms = new ArrayList<>(); + Map terms = new HashMap<>(); @Override public void addTerm(String field, BytesRef term) throws IOException { - terms.add(new Term(field, term)); + BytesRefHash hash = terms.computeIfAbsent(field, f -> new BytesRefHash()); + hash.add(term); } @Override public Query build() { - return new TermsQuery(terms); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (String field : terms.keySet()) { + builder.add(new TermInSetQuery(field, CollectionUtils.convertHash(terms.get(field))), BooleanClause.Occur.SHOULD); + } + return builder.build(); } }; } diff --git a/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TermsQueryTreeBuilder.java b/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TermsQueryTreeBuilder.java deleted file mode 100644 index 5b681900b..000000000 --- a/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TermsQueryTreeBuilder.java +++ /dev/null @@ -1,43 +0,0 @@ -package uk.co.flax.luwak.termextractor.treebuilder; -/* - * Copyright (c) 2016 Lemur Consulting Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.queries.TermsQuery; -import uk.co.flax.luwak.termextractor.QueryAnalyzer; -import uk.co.flax.luwak.termextractor.QueryTreeBuilder; -import uk.co.flax.luwak.termextractor.querytree.QueryTree; -import uk.co.flax.luwak.termextractor.weights.TermWeightor; - -public class TermsQueryTreeBuilder extends QueryTreeBuilder { - - public static final TermsQueryTreeBuilder INSTANCE = new TermsQueryTreeBuilder(); - - private TermsQueryTreeBuilder() { - super(TermsQuery.class); - } - - @Override - public QueryTree buildTree(QueryAnalyzer builder, TermWeightor weightor, TermsQuery query) { - try { - return builder.buildTree(query.rewrite(null), weightor); - } catch (IOException e) { - throw new RuntimeException(e); // should never happen - } - } - -} diff --git a/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TreeBuilders.java b/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TreeBuilders.java index 9fdbaa4f2..6fa2e352b 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TreeBuilders.java +++ b/luwak/src/main/java/uk/co/flax/luwak/termextractor/treebuilder/TreeBuilders.java @@ -68,7 +68,6 @@ public QueryTree buildTree(QueryAnalyzer builder, TermWeightor weightor, Query q newFilteringQueryBuilder(BoostedQuery.class, BoostedQuery::getQuery), newDisjunctionBuilder(DisjunctionMaxQuery.class, (b, w, q) -> q.getDisjuncts().stream().map(qq -> b.buildTree(qq, w)).collect(Collectors.toList())), - TermsQueryTreeBuilder.INSTANCE, TermInSetQueryTreeBuilder.INSTANCE, new QueryTreeBuilder(SpanWithinQuery.class) { @Override diff --git a/luwak/src/main/java/uk/co/flax/luwak/util/CollectionUtils.java b/luwak/src/main/java/uk/co/flax/luwak/util/CollectionUtils.java index d0325fae2..ee494ca8b 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/util/CollectionUtils.java +++ b/luwak/src/main/java/uk/co/flax/luwak/util/CollectionUtils.java @@ -1,5 +1,8 @@ package uk.co.flax.luwak.util; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; + import java.util.*; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; @@ -45,6 +48,15 @@ public static List> partition(List items, int slices) { return list; } + public static BytesRef[] convertHash(BytesRefHash hash) { + BytesRef terms[] = new BytesRef[hash.size()]; + for (int i = 0; i < terms.length; i++) { + BytesRef t = new BytesRef(); + terms[i] = hash.get(i, t); + } + return terms; + } + /** * Drains the queue as {@link BlockingQueue#drainTo(Collection, int)}, but if the requested * {@code numElements} elements are not available, it will wait for them up to the specified diff --git a/luwak/src/main/java/uk/co/flax/luwak/util/ForceNoBulkScoringQuery.java b/luwak/src/main/java/uk/co/flax/luwak/util/ForceNoBulkScoringQuery.java index 1692ce3a6..a3ae23866 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/util/ForceNoBulkScoringQuery.java +++ b/luwak/src/main/java/uk/co/flax/luwak/util/ForceNoBulkScoringQuery.java @@ -62,11 +62,16 @@ public Query getWrappedQuery() { } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { - final Weight innerWeight = inner.createWeight(searcher, needsScores); + final Weight innerWeight = inner.createWeight(searcher, needsScores, boost); return new Weight(ForceNoBulkScoringQuery.this) { + @Override + public boolean isCacheable(LeafReaderContext leafReaderContext) { + return innerWeight.isCacheable(leafReaderContext); + } + @Override public void extractTerms(Set set) { innerWeight.extractTerms(set); @@ -77,16 +82,6 @@ public Explanation explain(LeafReaderContext leafReaderContext, int i) throws IO return innerWeight.explain(leafReaderContext, i); } - @Override - public float getValueForNormalization() throws IOException { - return innerWeight.getValueForNormalization(); - } - - @Override - public void normalize(float v, float v1) { - innerWeight.normalize(v, v1); - } - @Override public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException { return innerWeight.scorer(leafReaderContext); diff --git a/luwak/src/main/java/uk/co/flax/luwak/util/SpanOffsetReportingQuery.java b/luwak/src/main/java/uk/co/flax/luwak/util/SpanOffsetReportingQuery.java index 7ff7c5b57..aa022815e 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/util/SpanOffsetReportingQuery.java +++ b/luwak/src/main/java/uk/co/flax/luwak/util/SpanOffsetReportingQuery.java @@ -87,8 +87,8 @@ public int hashCode() { } @Override - public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new SpanOffsetWeight(searcher, in.createWeight(searcher, needsScores)); + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + return new SpanOffsetWeight(searcher, in.createWeight(searcher, needsScores, boost), boost); } /** @@ -107,8 +107,8 @@ private class SpanOffsetWeight extends SpanWeight { private final SpanWeight in; - private SpanOffsetWeight(IndexSearcher searcher, SpanWeight in) throws IOException { - super(SpanOffsetReportingQuery.this, searcher, termContexts(in)); + private SpanOffsetWeight(IndexSearcher searcher, SpanWeight in, float boost) throws IOException { + super(SpanOffsetReportingQuery.this, searcher, termContexts(in), boost); this.in = in; } @@ -126,5 +126,10 @@ public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) throws I public void extractTerms(Set terms) { in.extractTerms(terms); } + + @Override + public boolean isCacheable(LeafReaderContext leafReaderContext) { + return in.isCacheable(leafReaderContext); + } } } diff --git a/luwak/src/main/java/uk/co/flax/luwak/util/SpanRewriter.java b/luwak/src/main/java/uk/co/flax/luwak/util/SpanRewriter.java index a58a67f56..23d9effa4 100644 --- a/luwak/src/main/java/uk/co/flax/luwak/util/SpanRewriter.java +++ b/luwak/src/main/java/uk/co/flax/luwak/util/SpanRewriter.java @@ -15,19 +15,32 @@ * limitations under the License. */ +import org.apache.lucene.index.PrefixCodedTerms; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.BytesRef; + import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.index.PrefixCodedTerms; -import org.apache.lucene.index.Term; -import org.apache.lucene.queries.TermsQuery; -import org.apache.lucene.search.*; -import org.apache.lucene.search.spans.*; -import org.apache.lucene.util.BytesRef; - public class SpanRewriter { public static final SpanRewriter INSTANCE = new SpanRewriter(); @@ -50,8 +63,6 @@ public Query rewrite(Query in, IndexSearcher searcher) throws RewriteException, return rewriteDisjunctionMaxQuery((DisjunctionMaxQuery) in, searcher); if (in instanceof TermInSetQuery) return rewriteTermInSetQuery((TermInSetQuery) in); - if (in instanceof TermsQuery) - return rewrite(in.rewrite(null), null); if (in instanceof BoostQuery) return rewrite(((BoostQuery) in).getQuery(), searcher); // we don't care about boosts for rewriting purposes if (in instanceof PhraseQuery) diff --git a/luwak/src/test/java/uk/co/flax/luwak/TestSlowLog.java b/luwak/src/test/java/uk/co/flax/luwak/TestSlowLog.java index 58b9c9547..d7d50f5c4 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/TestSlowLog.java +++ b/luwak/src/test/java/uk/co/flax/luwak/TestSlowLog.java @@ -1,15 +1,16 @@ package uk.co.flax.luwak; -import java.io.IOException; -import java.util.Map; - -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.*; -import org.apache.lucene.util.Bits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; import org.junit.Test; import uk.co.flax.luwak.matchers.SimpleMatcher; import uk.co.flax.luwak.presearcher.MatchAllPresearcher; +import java.io.IOException; +import java.util.Map; + import static org.assertj.core.api.Assertions.assertThat; /** @@ -39,7 +40,7 @@ public SlowQueryParser(long delay) { } @Override - public Query parse(String queryString, Map metadata) throws Exception { + public Query parse(String queryString, Map metadata) { if (queryString.equals("slow")) { return new Query() { @Override @@ -48,21 +49,13 @@ public String toString(String s) { } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) { + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) { try { Thread.sleep(delay); } catch (InterruptedException e) { throw new RuntimeException(e); } - return new RandomAccessWeight(this) { - protected Bits getMatchingDocs(LeafReaderContext context) throws IOException { - return new Bits.MatchAllBits(context.reader().maxDoc()); - } - - public String toString() { - return "weight(MatchAllDocs)"; - } - }; + return new MatchAllDocsQuery().createWeight(searcher, needsScores, boost); } @Override @@ -101,7 +94,7 @@ public void testSlowLog() throws IOException, UpdateException { .contains("2 [") .contains("3 ["); - monitor.setSlowLogLimit(2000000000000l); + monitor.setSlowLogLimit(2000000000000L); assertThat(monitor.match(doc1, SimpleMatcher.FACTORY).getSlowLog()) .isEmpty(); } diff --git a/luwak/src/test/java/uk/co/flax/luwak/analysis/TestSuffixingNGramTokenizer.java b/luwak/src/test/java/uk/co/flax/luwak/analysis/TestSuffixingNGramTokenizer.java index f40e4a032..0014d6e6f 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/analysis/TestSuffixingNGramTokenizer.java +++ b/luwak/src/test/java/uk/co/flax/luwak/analysis/TestSuffixingNGramTokenizer.java @@ -139,7 +139,7 @@ public static void main(String... args) throws IOException { // Cannot use try-with-resources here as we assign to ts in the block. LeafReader reader = batch.getIndexReader(); - TokenStream ts = new TermsEnumTokenStream(reader.fields().terms("f").iterator()); + TokenStream ts = new TermsEnumTokenStream(reader.terms("f").iterator()); try { ts = new SuffixingNGramTokenFilter(ts, "XX", "__WILDCARD__", 20); //ts = new DuplicateRemovalTokenFilter(ts); diff --git a/luwak/src/test/java/uk/co/flax/luwak/matchers/ConcurrentMatcherTestBase.java b/luwak/src/test/java/uk/co/flax/luwak/matchers/ConcurrentMatcherTestBase.java index 96f3e9f42..b46c235e7 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/matchers/ConcurrentMatcherTestBase.java +++ b/luwak/src/test/java/uk/co/flax/luwak/matchers/ConcurrentMatcherTestBase.java @@ -67,8 +67,9 @@ public void testMatchesAreDisambiguated() throws IOException, UpdateException { for (int i = 0; i < 10; i++) { queries.add(new MonitorQuery(Integer.toString(i), "test^10 doc " + Integer.toString(i))); } + queries.add(new MonitorQuery("11", "test^10")); monitor.update(queries); - assertThat(monitor.getDisjunctCount()).isEqualTo(30); + assertThat(monitor.getDisjunctCount()).isEqualTo(31); ExecutorService executor = Executors.newFixedThreadPool(4); @@ -79,15 +80,19 @@ public void testMatchesAreDisambiguated() throws IOException, UpdateException { Matches matches = monitor.match(batch, matcherFactory(executor, ScoringMatcher.FACTORY, 10)); - assertThat(matches.getMatchCount("1")).isEqualTo(10); - assertThat(matches.getQueriesRun()).isEqualTo(30); + assertThat(matches.getMatchCount("1")).isEqualTo(11); + assertThat(matches.getQueriesRun()).isEqualTo(31); assertThat(matches.getErrors()).isEmpty(); + + // The queries are all split into three by the QueryDecomposer, and the + // 'test' and 'doc' parts will match. 'test' will have a higher score, + // because of its lower termfreq. We need to check that each query ends + // up with the score for the 'test' subquery, not the 'doc' subquery + float testScore = + matches.getMatches("1").getMatches().stream().filter(m -> m.getQueryId().equals("11")).findFirst().get().getScore(); + for (ScoringMatch match : matches.getMatches("1")) { - // The queries are all split into three by the QueryDecomposer, and the - // 'test' and 'doc' parts will match. 'test' will have a higher score, - // because of it's lower termfreq. We need to check that each query ends - // up with the score for the 'test' subquery, not the 'doc' subquery - assertThat(match.getScore()).isEqualTo(2.5316024f); + assertThat(match.getScore()).isEqualTo(testScore); } } } diff --git a/luwak/src/test/java/uk/co/flax/luwak/matchers/TestHighlightingMatcher.java b/luwak/src/test/java/uk/co/flax/luwak/matchers/TestHighlightingMatcher.java index 558edc0b8..2d6dcf892 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/matchers/TestHighlightingMatcher.java +++ b/luwak/src/test/java/uk/co/flax/luwak/matchers/TestHighlightingMatcher.java @@ -21,6 +21,7 @@ import org.junit.Test; import uk.co.flax.luwak.*; +import uk.co.flax.luwak.Matches; import uk.co.flax.luwak.presearcher.MatchAllPresearcher; import uk.co.flax.luwak.queryparsers.LuceneQueryParser; import static uk.co.flax.luwak.assertions.HighlightingMatchAssert.assertThat; @@ -374,8 +375,8 @@ public int hashCode() { } @Override - public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return inner.createWeight(searcher, needsScores); + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + return inner.createWeight(searcher, needsScores, boost); } }, new MatchAllPresearcher()); diff --git a/luwak/src/test/java/uk/co/flax/luwak/presearcher/PresearcherTestBase.java b/luwak/src/test/java/uk/co/flax/luwak/presearcher/PresearcherTestBase.java index f6c0e6585..391c527ec 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/presearcher/PresearcherTestBase.java +++ b/luwak/src/test/java/uk/co/flax/luwak/presearcher/PresearcherTestBase.java @@ -1,10 +1,6 @@ package uk.co.flax.luwak.presearcher; -import java.io.IOException; -import java.util.Map; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LegacyNumericTokenStream; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; @@ -15,13 +11,24 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.*; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.BytesRef; import org.junit.Before; import org.junit.Test; -import uk.co.flax.luwak.*; +import uk.co.flax.luwak.DocumentBatch; +import uk.co.flax.luwak.InputDocument; +import uk.co.flax.luwak.Monitor; +import uk.co.flax.luwak.MonitorQuery; +import uk.co.flax.luwak.MonitorQueryParser; +import uk.co.flax.luwak.Presearcher; +import uk.co.flax.luwak.UpdateException; import uk.co.flax.luwak.matchers.SimpleMatcher; import uk.co.flax.luwak.queryparsers.LuceneQueryParser; +import java.io.IOException; +import java.util.Map; + import static uk.co.flax.luwak.assertions.MatchesAssert.assertThat; /** @@ -215,41 +222,6 @@ public void testNonStringTermHandling() throws IOException, UpdateException { } - @Test - @SuppressWarnings("deprecation") - public void filtersOnNumericTermQueries() throws IOException, UpdateException { - - // Rudimentary query parser which returns numeric encoded BytesRefs - try (Monitor numeric_monitor = new Monitor(new MonitorQueryParser() { - @Override - public Query parse(String queryString, Map metadata) throws Exception - { - BytesRefBuilder brb = new BytesRefBuilder(); - LegacyNumericUtils.intToPrefixCoded(Integer.parseInt(queryString), 0, brb); - - Term t = new Term(TEXTFIELD, brb.get()); - return new TermQuery(t); - } - }, presearcher)) { - - for (int i = 8; i <= 15; i++) { - numeric_monitor.update(new MonitorQuery("query" + i, "" + i)); - } - - for (int i = 8; i <= 15; i++) { - LegacyNumericTokenStream nts = new LegacyNumericTokenStream(1); - nts.setIntValue(i); - InputDocument doc = InputDocument.builder("doc" + i) - .addField(new TextField(TEXTFIELD, nts)).build(); - assertThat(numeric_monitor.match(doc, SimpleMatcher.FACTORY)) - .matchesDoc("doc" + i) - .hasMatchCount("doc" + i, 1) - .matchesQuery("query" + i, "doc" + i); - } - - } - } - public static BooleanClause must(Query q) { return new BooleanClause(q, BooleanClause.Occur.MUST); } diff --git a/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestMultipassPresearcher.java b/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestMultipassPresearcher.java index d06cc9b18..4583b2d94 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestMultipassPresearcher.java +++ b/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestMultipassPresearcher.java @@ -1,20 +1,32 @@ package uk.co.flax.luwak.presearcher; -import java.io.IOException; - import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.index.*; -import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.assertj.core.api.Assertions; import org.junit.Test; -import uk.co.flax.luwak.*; +import uk.co.flax.luwak.DocumentBatch; +import uk.co.flax.luwak.InputDocument; +import uk.co.flax.luwak.Matches; +import uk.co.flax.luwak.Monitor; +import uk.co.flax.luwak.MonitorQuery; +import uk.co.flax.luwak.Presearcher; +import uk.co.flax.luwak.QueryMatch; +import uk.co.flax.luwak.QueryTermFilter; +import uk.co.flax.luwak.UpdateException; import uk.co.flax.luwak.matchers.SimpleMatcher; import uk.co.flax.luwak.queryparsers.LuceneQueryParser; -import uk.co.flax.luwak.termextractor.weights.TermWeightor; + +import java.io.IOException; import static uk.co.flax.luwak.assertions.MatchesAssert.assertThat; @@ -75,6 +87,20 @@ public void testComplexBoolean() throws IOException, UpdateException { } + @Test + public void testMultipleFields() throws IOException, UpdateException { + + monitor.update(new MonitorQuery("1", "field1:(foo OR bar) AND field2:cormorant")); + InputDocument doc = InputDocument.builder("doc1") + .addField("field1", "a badger walked into a bar", WHITESPACE) + .addField("field2", "cormorant", WHITESPACE) + .build(); + + assertThat(monitor.match(doc, SimpleMatcher.FACTORY)) + .hasQueriesRunCount(1) + .hasMatchCount("doc1", 1); + } + @Test public void testQueryBuilder() throws IOException, UpdateException { @@ -96,11 +122,11 @@ public void testQueryBuilder() throws IOException, UpdateException { BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docs.getIndexReader(), new QueryTermFilter(reader)); BooleanQuery expected = new BooleanQuery.Builder() .add(should(new BooleanQuery.Builder() - .add(must(new TermsQuery(new Term("f_0", "test")))) - .add(must(new TermsQuery(new Term("f_1", "test")))) - .add(must(new TermsQuery(new Term("f_2", "test")))) - .add(must(new TermsQuery(new Term("f_3", "test")))) - .build())) + .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build())) + .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build())) + .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build())) + .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build())) + .build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) .build(); diff --git a/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestTermPresearcher.java b/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestTermPresearcher.java index d47a59ddf..ff1e5b84c 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestTermPresearcher.java +++ b/luwak/src/test/java/uk/co/flax/luwak/presearcher/TestTermPresearcher.java @@ -1,25 +1,37 @@ package uk.co.flax.luwak.presearcher; -import java.io.IOException; -import java.util.Map; - import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.*; -import org.apache.lucene.queries.TermsQuery; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.assertj.core.api.Assertions; import org.junit.Test; -import uk.co.flax.luwak.*; +import uk.co.flax.luwak.DocumentBatch; +import uk.co.flax.luwak.InputDocument; +import uk.co.flax.luwak.Matches; +import uk.co.flax.luwak.Monitor; +import uk.co.flax.luwak.MonitorQuery; +import uk.co.flax.luwak.Presearcher; +import uk.co.flax.luwak.QueryMatch; +import uk.co.flax.luwak.QueryTermFilter; +import uk.co.flax.luwak.UpdateException; import uk.co.flax.luwak.matchers.SimpleMatcher; import uk.co.flax.luwak.queryparsers.LuceneQueryParser; import uk.co.flax.luwak.termextractor.querytree.QueryTree; -import uk.co.flax.luwak.termextractor.weights.TermWeightor; + +import java.io.IOException; +import java.util.Map; import static uk.co.flax.luwak.assertions.MatchesAssert.assertThat; @@ -122,7 +134,7 @@ public void testQueryBuilder() throws IOException, UpdateException { BooleanQuery q = (BooleanQuery) presearcher.buildQuery(batch.getIndexReader(), new QueryTermFilter(reader)); BooleanQuery expected = new BooleanQuery.Builder() - .add(should(new TermsQuery(new Term("f", "test")))) + .add(should(new BooleanQuery.Builder().add(should(new TermInSetQuery("f", new BytesRef("test")))).build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) .build(); diff --git a/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestCoreLuceneQueryExtractors.java b/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestCoreLuceneQueryExtractors.java index 26a2cd123..44d101157 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestCoreLuceneQueryExtractors.java +++ b/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestCoreLuceneQueryExtractors.java @@ -44,7 +44,8 @@ public class TestCoreLuceneQueryExtractors { "org.apache.lucene.search.FieldValueQuery", "org.apache.lucene.search.BlendedTermQuery", "org.apache.lucene.search.AutomatonQuery", - "org.apache.lucene.queries.BoostingQuery" + "org.apache.lucene.queries.BoostingQuery", + "org.apache.lucene.search.intervals.IntervalQuery" ); public static Set unhandledQueries = ImmutableSet.of( @@ -64,7 +65,10 @@ public class TestCoreLuceneQueryExtractors { "org.apache.lucene.queries.function.FunctionQuery", "org.apache.lucene.queries.function.FunctionMatchQuery", "org.apache.lucene.search.IndexOrDocValuesQuery", - "org.apache.lucene.search.LegacyNumericRangeQuery" + "org.apache.lucene.search.LegacyNumericRangeQuery", + "org.apache.lucene.search.NormsFieldExistsQuery", + "org.apache.lucene.search.DocValuesFieldExistsQuery", + "org.apache.lucene.search.CoveringQuery" ); public static Set unhandledTypes = ImmutableSet.of( diff --git a/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestExtractors.java b/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestExtractors.java index 9208f05b8..d2e53a08c 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestExtractors.java +++ b/luwak/src/test/java/uk/co/flax/luwak/termextractor/TestExtractors.java @@ -2,10 +2,18 @@ import com.google.common.collect.ImmutableList; import org.apache.lucene.index.Term; -import org.apache.lucene.queries.TermsQuery; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; import org.junit.Test; -import uk.co.flax.luwak.presearcher.TermFilteredPresearcher; import uk.co.flax.luwak.termextractor.treebuilder.RegexpNGramTermQueryTreeBuilder; import uk.co.flax.luwak.termextractor.weights.TermWeightor; import uk.co.flax.luwak.termextractor.weights.TokenLengthNorm; @@ -54,25 +62,6 @@ public void testRegexpExtractor() { } - @Test - @SuppressWarnings("deprecation") - public void testRangeQueriesReturnAnyToken() { - - LegacyNumericRangeQuery nrq = LegacyNumericRangeQuery.newLongRange("field", 0l, 10l, true, true); - - assertThat(treeBuilder.collectTerms(nrq, WEIGHTOR)) - .hasSize(1) - .extracting("type") - .containsExactly(QueryTerm.Type.ANY); - - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - bq.add(nrq, BooleanClause.Occur.MUST); - bq.add(new TermQuery(new Term("field", "term")), BooleanClause.Occur.MUST); - - assertThat(treeBuilder.collectTerms(bq.build(), WEIGHTOR)) - .containsExactly(new QueryTerm("field", "term", QueryTerm.Type.EXACT)); - } - @Test public void testConstantScoreQueryExtractor() { @@ -122,10 +111,10 @@ public void testDisjunctionMaxExtractor() { } @Test - public void testTermsQueryExtractor() { - Query q = new TermsQuery(new Term("f1", "t1"), new Term("f2", "t2")); + public void testTermInSetQueryExtractor() { + Query q = new TermInSetQuery("f1", new BytesRef("t1"), new BytesRef("t2")); assertThat(treeBuilder.collectTerms(q, WEIGHTOR)) - .containsOnly(new QueryTerm("f1", "t1", QueryTerm.Type.EXACT), new QueryTerm("f2", "t2", QueryTerm.Type.EXACT)); + .containsOnly(new QueryTerm("f1", "t1", QueryTerm.Type.EXACT), new QueryTerm("f1", "t2", QueryTerm.Type.EXACT)); } @Test diff --git a/luwak/src/test/java/uk/co/flax/luwak/util/TestSpanRewriter.java b/luwak/src/test/java/uk/co/flax/luwak/util/TestSpanRewriter.java index 61ab20c9c..5169f4257 100644 --- a/luwak/src/test/java/uk/co/flax/luwak/util/TestSpanRewriter.java +++ b/luwak/src/test/java/uk/co/flax/luwak/util/TestSpanRewriter.java @@ -16,8 +16,12 @@ */ import org.apache.lucene.index.Term; -import org.apache.lucene.queries.TermsQuery; -import org.apache.lucene.search.*; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.junit.Test; @@ -26,15 +30,6 @@ public class TestSpanRewriter { - @Test - public void testTermsQueryWithMultipleFields() throws Exception { - - TermsQuery tq = new TermsQuery(new Term("field1", "term1"), new Term("field2", "term1"), new Term("field2", "term2")); - - Query q = new SpanRewriter().rewrite(tq, null); - assertThat(q).isInstanceOf(BooleanQuery.class); - } - @Test public void testBoostQuery() throws Exception { diff --git a/pom.xml b/pom.xml index c4f49ad48..889561782 100644 --- a/pom.xml +++ b/pom.xml @@ -20,7 +20,7 @@ org.apache.lucene - 6.5.0 + 7.5.0 UTF-8 UTF-8 1.8