Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
romseygeek committed Aug 1, 2013
0 parents commit 69c00f5
Show file tree
Hide file tree
Showing 16 changed files with 897 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.idea/
target/
15 changes: 15 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Copyright (c) 2013 Lemur Consulting Ltd.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.

See the License for the specific language governing permissions and
limitations under the License.
1 change: 1 addition & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Luwak - stored query engine from Flax
25 changes: 25 additions & 0 deletions luwak.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-core:5.0.0-intervals-1.0" level="project" />
<orderEntry type="library" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-memory:5.0.0-intervals-1.0" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.easytesting:fest-assert-core:2.0M10" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.easytesting:fest-util:1.2.5" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.slf4j:slf4j-simple:1.7.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: uk.co.flax.lucene-solr-intervals:lucene-analyzers-common:5.0.0-intervals-1.0" level="project" />
</component>
</module>

63 changes: 63 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>uk.co.flax</groupId>
<artifactId>luwak</artifactId>
<packaging>pom</packaging>
<version>0.1-SNAPSHOT</version>

<properties>
<lucene.version>5.0.0-intervals-1.0</lucene.version>
<slf4j.version>1.7.2</slf4j.version>
</properties>

<dependencies>

<dependency>
<groupId>uk.co.flax.lucene-solr-intervals</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>uk.co.flax.lucene-solr-intervals</groupId>
<artifactId>lucene-memory</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easytesting</groupId>
<artifactId>fest-assert-core</artifactId>
<version>2.0M10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>${slf4j.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>uk.co.flax.lucene-solr-intervals</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
</dependency>

</dependencies>


</project>
41 changes: 41 additions & 0 deletions src/main/java/uk/co/flax/luwak/InputDocument.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package uk.co.flax.luwak;

import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;

/**
* Copyright (c) 2013 Lemur Consulting Ltd.
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public abstract class InputDocument {

private final String id;
protected final MemoryIndex index = new MemoryIndex(true);

public InputDocument(String id) {
this.id = id;
}

public abstract Query getPresearcherQuery();

public String getId() {
return id;
}

public MemoryIndex getDocumentIndex() {
return index;
}

}
84 changes: 84 additions & 0 deletions src/main/java/uk/co/flax/luwak/MatchCollector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package uk.co.flax.luwak;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.intervals.Interval;
import org.apache.lucene.search.intervals.IntervalCollector;
import org.apache.lucene.search.intervals.IntervalIterator;

import java.io.IOException;

/**
* Copyright (c) 2013 Lemur Consulting Ltd.
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public class MatchCollector extends Collector implements IntervalCollector {

protected Scorer scorer;
private IntervalIterator positions;

private QueryMatch matches = null;
private final MonitorQuery mq;

public MatchCollector(MonitorQuery mq) {
this.mq = mq;
}

public QueryMatch getMatches() {
return matches;
}

@Override
public void collect(int doc) throws IOException {
// consume any remaining positions the scorer didn't report
matches = new QueryMatch(this.mq.getId());
positions.scorerAdvanced(doc);
while(positions.next() != null) {
positions.collect(this);
}
}

public boolean acceptsDocsOutOfOrder() {
return false;
}

public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
positions = scorer.intervals(true);
// If we want to visit the other scorers, we can, here...
}

public void setNextReader(AtomicReaderContext context) throws IOException {
}

@Override
public Weight.PostingFeatures postingFeatures() {
return Weight.PostingFeatures.OFFSETS;
}

@Override
public void collectLeafPosition(Scorer scorer, Interval interval, int docID) {
matches.addInterval(interval);
}

@Override
public void collectComposite(Scorer scorer, Interval interval,
int docID) {
//offsets.add(new Offset(interval.begin, interval.end, interval.offsetBegin, interval.offsetEnd));
}

}
50 changes: 50 additions & 0 deletions src/main/java/uk/co/flax/luwak/MatchResponse.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package uk.co.flax.luwak;

import java.util.List;

/**
* Copyright (c) 2013 Lemur Consulting Ltd.
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public class MatchResponse {

private final String id;
private final List<QueryMatch> matches;
private final int qcount;
private final MatchStats stats;

public MatchResponse(String docId, List<QueryMatch> matches, int qcount, MatchStats matchStats) {
this.id = docId;
this.matches = matches;
this.qcount = qcount;
this.stats = matchStats;
}

public String docId() {
return id;
}

public List<QueryMatch> matches() {
return matches;
}

public int appliedQueryCount() {
return qcount;
}

public MatchStats getMatchStats() {
return stats;
}
}
101 changes: 101 additions & 0 deletions src/main/java/uk/co/flax/luwak/MatchResponseCollector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package uk.co.flax.luwak;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
* Copyright (c) 2013 Lemur Consulting Ltd.
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public class MatchResponseCollector extends Collector {

static {
BooleanQuery.setMaxClauseCount(10000);
}

private static final Logger logger = LoggerFactory.getLogger(MatchResponseCollector.class);

//private final String docId;
private final InputDocument doc;
private final Map<String, MonitorQuery> queries;

private final List<QueryMatch> matches = new ArrayList<QueryMatch>();

SortedDocValues idField;
final BytesRef idRef = new BytesRef();

IndexSearcher withinDocSearcher;

private int queryCount;

public MatchResponseCollector(Map<String, MonitorQuery> queries, final InputDocument doc) {
this.doc = doc;
this.queries = queries;
this.withinDocSearcher = doc.getDocumentIndex().createSearcher();
}

@Override
public void setScorer(Scorer scorer) throws IOException {
// no impl
}

@Override
public void collect(final int doc) throws IOException {

idField.get(doc, idRef);
final MonitorQuery mq = queries.get(idRef.utf8ToString());

MatchCollector mc = new MatchCollector(mq);
try {
withinDocSearcher.search(mq.getQuery(), mc);
QueryMatch newMatches = mc.getMatches();
if (newMatches != null)
this.matches.add(newMatches);
queryCount++;
}
catch (Exception e) {
logger.error("Error while running query {} against document {}: {}",
mq.getId(), this.doc.getId(), e.toString());
e.printStackTrace();
}
}

@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
idField = context.reader().getSortedDocValues(Monitor.FIELDS.id);
}

@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}

public MatchResponse getMatchResponse(MatchStats matchStats) {
return new MatchResponse(this.doc.getId(), this.matches, this.queryCount, matchStats);
}

}
Loading

0 comments on commit 69c00f5

Please sign in to comment.