forked from flaxsearch/luwak
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 69c00f5
Showing
16 changed files
with
897 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.idea/ | ||
target/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
Copyright (c) 2013 Lemur Consulting Ltd. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
implied. | ||
|
||
See the License for the specific language governing permissions and | ||
limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Luwak - stored query engine from Flax |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> | ||
<component name="NewModuleRootManager" inherit-compiler-output="false"> | ||
<output url="file://$MODULE_DIR$/target/classes" /> | ||
<output-test url="file://$MODULE_DIR$/target/test-classes" /> | ||
<content url="file://$MODULE_DIR$"> | ||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" /> | ||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" isTestSource="false" /> | ||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" /> | ||
<excludeFolder url="file://$MODULE_DIR$/target" /> | ||
</content> | ||
<orderEntry type="inheritedJdk" /> | ||
<orderEntry type="sourceFolder" forTests="false" /> | ||
<orderEntry type="library" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-core:5.0.0-intervals-1.0" level="project" /> | ||
<orderEntry type="library" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-memory:5.0.0-intervals-1.0" level="project" /> | ||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.2" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: org.easytesting:fest-assert-core:2.0M10" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: org.easytesting:fest-util:1.2.5" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: org.slf4j:slf4j-simple:1.7.2" level="project" /> | ||
<orderEntry type="library" scope="TEST" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-analyzers-common:5.0.0-intervals-1.0" level="project" /> | ||
</component> | ||
</module> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>uk.co.flax</groupId> | ||
<artifactId>luwak</artifactId> | ||
<packaging>pom</packaging> | ||
<version>0.1-SNAPSHOT</version> | ||
|
||
<properties> | ||
<lucene.version>5.0.0-intervals-1.0</lucene.version> | ||
<slf4j.version>1.7.2</slf4j.version> | ||
</properties> | ||
|
||
<dependencies> | ||
|
||
<dependency> | ||
<groupId>uk.co.flax.lucene-solr-intervals</groupId> | ||
<artifactId>lucene-core</artifactId> | ||
<version>${lucene.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>uk.co.flax.lucene-solr-intervals</groupId> | ||
<artifactId>lucene-memory</artifactId> | ||
<version>${lucene.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-api</artifactId> | ||
<version>${slf4j.version}</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.11</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.easytesting</groupId> | ||
<artifactId>fest-assert-core</artifactId> | ||
<version>2.0M10</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.slf4j</groupId> | ||
<artifactId>slf4j-simple</artifactId> | ||
<version>${slf4j.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>uk.co.flax.lucene-solr-intervals</groupId> | ||
<artifactId>lucene-analyzers-common</artifactId> | ||
<version>${lucene.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
|
||
|
||
</project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package uk.co.flax.luwak; | ||
|
||
import org.apache.lucene.index.memory.MemoryIndex; | ||
import org.apache.lucene.search.Query; | ||
|
||
/** | ||
* Copyright (c) 2013 Lemur Consulting Ltd. | ||
* <p/> | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* <p/> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p/> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
public abstract class InputDocument { | ||
|
||
private final String id; | ||
protected final MemoryIndex index = new MemoryIndex(true); | ||
|
||
public InputDocument(String id) { | ||
this.id = id; | ||
} | ||
|
||
public abstract Query getPresearcherQuery(); | ||
|
||
public String getId() { | ||
return id; | ||
} | ||
|
||
public MemoryIndex getDocumentIndex() { | ||
return index; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
package uk.co.flax.luwak; | ||
|
||
import org.apache.lucene.index.AtomicReaderContext; | ||
import org.apache.lucene.search.Collector; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.Weight; | ||
import org.apache.lucene.search.intervals.Interval; | ||
import org.apache.lucene.search.intervals.IntervalCollector; | ||
import org.apache.lucene.search.intervals.IntervalIterator; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* Copyright (c) 2013 Lemur Consulting Ltd. | ||
* <p/> | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* <p/> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p/> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
public class MatchCollector extends Collector implements IntervalCollector { | ||
|
||
protected Scorer scorer; | ||
private IntervalIterator positions; | ||
|
||
private QueryMatch matches = null; | ||
private final MonitorQuery mq; | ||
|
||
public MatchCollector(MonitorQuery mq) { | ||
this.mq = mq; | ||
} | ||
|
||
public QueryMatch getMatches() { | ||
return matches; | ||
} | ||
|
||
@Override | ||
public void collect(int doc) throws IOException { | ||
// consume any remaining positions the scorer didn't report | ||
matches = new QueryMatch(this.mq.getId()); | ||
positions.scorerAdvanced(doc); | ||
while(positions.next() != null) { | ||
positions.collect(this); | ||
} | ||
} | ||
|
||
public boolean acceptsDocsOutOfOrder() { | ||
return false; | ||
} | ||
|
||
public void setScorer(Scorer scorer) throws IOException { | ||
this.scorer = scorer; | ||
positions = scorer.intervals(true); | ||
// If we want to visit the other scorers, we can, here... | ||
} | ||
|
||
public void setNextReader(AtomicReaderContext context) throws IOException { | ||
} | ||
|
||
@Override | ||
public Weight.PostingFeatures postingFeatures() { | ||
return Weight.PostingFeatures.OFFSETS; | ||
} | ||
|
||
@Override | ||
public void collectLeafPosition(Scorer scorer, Interval interval, int docID) { | ||
matches.addInterval(interval); | ||
} | ||
|
||
@Override | ||
public void collectComposite(Scorer scorer, Interval interval, | ||
int docID) { | ||
//offsets.add(new Offset(interval.begin, interval.end, interval.offsetBegin, interval.offsetEnd)); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package uk.co.flax.luwak; | ||
|
||
import java.util.List; | ||
|
||
/** | ||
* Copyright (c) 2013 Lemur Consulting Ltd. | ||
* <p/> | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* <p/> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p/> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
public class MatchResponse { | ||
|
||
private final String id; | ||
private final List<QueryMatch> matches; | ||
private final int qcount; | ||
private final MatchStats stats; | ||
|
||
public MatchResponse(String docId, List<QueryMatch> matches, int qcount, MatchStats matchStats) { | ||
this.id = docId; | ||
this.matches = matches; | ||
this.qcount = qcount; | ||
this.stats = matchStats; | ||
} | ||
|
||
public String docId() { | ||
return id; | ||
} | ||
|
||
public List<QueryMatch> matches() { | ||
return matches; | ||
} | ||
|
||
public int appliedQueryCount() { | ||
return qcount; | ||
} | ||
|
||
public MatchStats getMatchStats() { | ||
return stats; | ||
} | ||
} |
101 changes: 101 additions & 0 deletions
101
src/main/java/uk/co/flax/luwak/MatchResponseCollector.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package uk.co.flax.luwak; | ||
|
||
import org.apache.lucene.index.AtomicReaderContext; | ||
import org.apache.lucene.index.SortedDocValues; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.Collector; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* Copyright (c) 2013 Lemur Consulting Ltd. | ||
* <p/> | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* <p/> | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* <p/> | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
public class MatchResponseCollector extends Collector { | ||
|
||
static { | ||
BooleanQuery.setMaxClauseCount(10000); | ||
} | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(MatchResponseCollector.class); | ||
|
||
//private final String docId; | ||
private final InputDocument doc; | ||
private final Map<String, MonitorQuery> queries; | ||
|
||
private final List<QueryMatch> matches = new ArrayList<QueryMatch>(); | ||
|
||
SortedDocValues idField; | ||
final BytesRef idRef = new BytesRef(); | ||
|
||
IndexSearcher withinDocSearcher; | ||
|
||
private int queryCount; | ||
|
||
public MatchResponseCollector(Map<String, MonitorQuery> queries, final InputDocument doc) { | ||
this.doc = doc; | ||
this.queries = queries; | ||
this.withinDocSearcher = doc.getDocumentIndex().createSearcher(); | ||
} | ||
|
||
@Override | ||
public void setScorer(Scorer scorer) throws IOException { | ||
// no impl | ||
} | ||
|
||
@Override | ||
public void collect(final int doc) throws IOException { | ||
|
||
idField.get(doc, idRef); | ||
final MonitorQuery mq = queries.get(idRef.utf8ToString()); | ||
|
||
MatchCollector mc = new MatchCollector(mq); | ||
try { | ||
withinDocSearcher.search(mq.getQuery(), mc); | ||
QueryMatch newMatches = mc.getMatches(); | ||
if (newMatches != null) | ||
this.matches.add(newMatches); | ||
queryCount++; | ||
} | ||
catch (Exception e) { | ||
logger.error("Error while running query {} against document {}: {}", | ||
mq.getId(), this.doc.getId(), e.toString()); | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
@Override | ||
public void setNextReader(AtomicReaderContext context) throws IOException { | ||
idField = context.reader().getSortedDocValues(Monitor.FIELDS.id); | ||
} | ||
|
||
@Override | ||
public boolean acceptsDocsOutOfOrder() { | ||
return true; | ||
} | ||
|
||
public MatchResponse getMatchResponse(MatchStats matchStats) { | ||
return new MatchResponse(this.doc.getId(), this.matches, this.queryCount, matchStats); | ||
} | ||
|
||
} |
Oops, something went wrong.