Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;

/** BulkScorer that prioritizes document ranges using impact information. */
public class ImpactRangeBulkScorer extends BulkScorer {

private static final long MAX_RANGES = 10000;

private final BulkScorer delegate;
private final int rangeSize;
private final int minDoc;
private final int maxDoc;
private final SimScorer simScorer;
private final ImpactsEnum impactsEnum;

public ImpactRangeBulkScorer(
BulkScorer delegate,
int rangeSize,
int minDoc,
int maxDoc,
SimScorer simScorer,
ImpactsEnum impactsEnum) {
this.delegate = delegate;
this.rangeSize = rangeSize;
this.minDoc = minDoc;
this.maxDoc = maxDoc;
this.simScorer = simScorer;
this.impactsEnum = impactsEnum;
}

@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
int actualMin = Math.max(min, minDoc);
int actualMax = Math.min(max, maxDoc);

if (actualMin >= actualMax) {
return actualMax;
}

if (impactsEnum == null || simScorer == null) {
return delegate.score(collector, acceptDocs, actualMin, actualMax);
}

List<Range> ranges;
try {
ranges = calculateRangePriorities(actualMin, actualMax);
} catch (IOException e) {
throw e;
}

if (ranges.isEmpty()) {
return delegate.score(collector, acceptDocs, actualMin, actualMax);
}

Collections.sort(ranges, (a, b) -> Float.compare(b.priority, a.priority));

EarlyTerminationWrapper wrapper = new EarlyTerminationWrapper(collector);

int lastDoc = -1;
for (Range range : ranges) {
if (wrapper.minCompetitiveScore > 0 && range.priority < wrapper.minCompetitiveScore) {
continue;
}

int rangeLastDoc = delegate.score(wrapper, acceptDocs, range.start, range.end);
if (rangeLastDoc > lastDoc) {
lastDoc = rangeLastDoc;
}
}

return lastDoc == -1 ? actualMax : lastDoc;
}

private static class EarlyTerminationWrapper implements LeafCollector {
private final LeafCollector delegate;
private float minCompetitiveScore = 0;

EarlyTerminationWrapper(LeafCollector delegate) {
this.delegate = delegate;
}

@Override
public void setScorer(Scorable scorer) throws IOException {
Scorable wrapper =
new Scorable() {
@Override
public float score() throws IOException {
return scorer.score();
}

@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
minCompetitiveScore = minScore;
scorer.setMinCompetitiveScore(minScore);
}
};
delegate.setScorer(wrapper);
}

@Override
public void collect(int doc) throws IOException {
delegate.collect(doc);
}
}

private List<Range> calculateRangePriorities(int min, int max) throws IOException {
if (max <= min) {
return new ArrayList<>();
}
int numRanges = (max - min + rangeSize - 1) / rangeSize;
if (numRanges <= 0) {
return new ArrayList<>();
}
if (numRanges > MAX_RANGES) {
return new ArrayList<>();
}
List<Range> ranges = new ArrayList<>(numRanges);

int lastShallowTarget = -1;

for (int i = 0; i < numRanges; i++) {
int rangeStart = min + i * rangeSize;
int rangeEnd = Math.min(rangeStart + rangeSize, max);

float priority = 0;
try {
if (rangeStart > lastShallowTarget) {
impactsEnum.advanceShallow(rangeStart);
lastShallowTarget = rangeStart;
}

Impacts impacts = impactsEnum.getImpacts();

float maxScore = 0;
for (int level = 0; level < impacts.numLevels(); level++) {
int docUpTo = impacts.getDocIdUpTo(level);
if (docUpTo >= rangeStart) {
List<Impact> impactList = impacts.getImpacts(level);
for (Impact impact : impactList) {
float score = simScorer.score(impact.freq, impact.norm);
maxScore = Math.max(maxScore, score);
}
if (docUpTo >= rangeEnd) {
break;
}
}
}
priority = maxScore;
} catch (IOException e) {
throw e;
}

ranges.add(new Range(rangeStart, rangeEnd, priority));
}

return ranges;
}

private static class Range {
final int start;
final int end;
final float priority;

Range(int start, int end, float priority) {
this.start = start;
this.end = end;
this.priority = priority;
}
}

@Override
public long cost() {
return delegate.cost();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc.search;

import java.io.IOException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Weight;

/** Query wrapper that prioritizes document ranges using impact information. */
public class ImpactRangeQuery extends Query {

private final Query query;
private final int rangeSize;
private final int minDoc;
private final int maxDoc;

/** Create a new ImpactRangeQuery. */
public ImpactRangeQuery(Query query, int rangeSize) {
this.query = query;
this.rangeSize = rangeSize;
this.minDoc = 0;
this.maxDoc = Integer.MAX_VALUE;
}

/** Create a new ImpactRangeQuery with document range restriction. */
public ImpactRangeQuery(Query query, int minDoc, int maxDoc) {
this.query = query;
this.rangeSize = Math.max(1, maxDoc - minDoc);
this.minDoc = minDoc;
this.maxDoc = maxDoc;
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = query.rewrite(indexSearcher);
if (rewritten != query) {
if (minDoc != 0 || maxDoc != Integer.MAX_VALUE) {
return new ImpactRangeQuery(rewritten, minDoc, maxDoc);
} else {
return new ImpactRangeQuery(rewritten, rangeSize);
}
}
return this;
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
Weight innerWeight = query.createWeight(searcher, scoreMode, boost);
return new ImpactRangeWeight(
this, innerWeight, rangeSize, minDoc, maxDoc, searcher, scoreMode, boost);
}

@Override
public void visit(QueryVisitor visitor) {
query.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, query));
}

@Override
public String toString(String field) {
return "ImpactRange(" + query.toString(field) + ", rangeSize=" + rangeSize + ")";
}

@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
ImpactRangeQuery other = (ImpactRangeQuery) obj;
return query.equals(other.query)
&& rangeSize == other.rangeSize
&& minDoc == other.minDoc
&& maxDoc == other.maxDoc;
}

@Override
public int hashCode() {
int result = classHash();
result = 31 * result + query.hashCode();
result = 31 * result + rangeSize;
result = 31 * result + minDoc;
result = 31 * result + maxDoc;
return result;
}

public Query getQuery() {
return query;
}

public int getRangeSize() {
return rangeSize;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc.search;

import java.io.IOException;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.similarities.Similarity;

/** ScorerSupplier wrapper that creates ImpactRangeBulkScorer. */
public class ImpactRangeScorerSupplier extends ScorerSupplier {

private final ScorerSupplier in;
private final int rangeSize;
private final int minDoc;
private final int maxDoc;
private final Similarity.SimScorer simScorer;
private final ScoreMode scoreMode;

public ImpactRangeScorerSupplier(
ScorerSupplier in,
int rangeSize,
int minDoc,
int maxDoc,
Similarity.SimScorer simScorer,
ScoreMode scoreMode) {
this.in = in;
this.rangeSize = rangeSize;
this.minDoc = minDoc;
this.maxDoc = maxDoc;
this.simScorer = simScorer;
this.scoreMode = scoreMode;
}

@Override
public Scorer get(long leadCost) throws IOException {
return in.get(leadCost);
}

@Override
public BulkScorer bulkScorer() throws IOException {
BulkScorer delegate = in.bulkScorer();
if (delegate == null) {
return null;
}

// Only try to get impacts when ScoreMode suggests they would be properly configured
ImpactsEnum impactsEnum = null;
if (scoreMode == ScoreMode.TOP_SCORES) {
Scorer scorer = in.get(Long.MAX_VALUE);
if (scorer != null && scorer.iterator() instanceof ImpactsEnum) {
impactsEnum = (ImpactsEnum) scorer.iterator();
}
}

return new ImpactRangeBulkScorer(delegate, rangeSize, minDoc, maxDoc, simScorer, impactsEnum);
}

@Override
public long cost() {
return in.cost();
}
}
Loading
Loading