Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jlinn committed Jan 18, 2015
0 parents commit 880bb2d
Show file tree
Hide file tree
Showing 15 changed files with 683 additions and 0 deletions.
76 changes: 76 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Created by https://www.gitignore.io

### Elasticsearch ###
/data

### Java ###
*.class

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.ear

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*


### Maven ###
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties


### Intellij ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm

*.iml

## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
language: java

jdk:
- oraclejdk7
- oraclejdk8
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
Elasticsearch URL Token Filter
==============================

This plugin enables URL token filtering by URL part.

[![Build Status](https://secure.travis-ci.org/jlinn/elasticsearch-analysis-url.png?branch=master)](http://travis-ci.org/jlinn/elasticsearch-analysis-url)

## Compatibility

| Elasticsearch Version | Plugin Version |
|-----------------------|----------------|
| 1.4.2 | 1.0.0 |

## Installation
```bash
bin/plugin --install analysis-url --url https://github.com/jlinn/elasticsearch-analysis-url/releases/download/v1.0.0/elasticsearch-analysis-url-1.0.0.zip
```

## Usage
This filter only has one option: `part`. This option defaults to `whole`, which will cause the entire URL to be returned. In this case, the filter only serves to validate incoming URLs. Other possible values are:
`protocol`, `host`, `port`, `path`, `query`, and `ref`.

Set up your index like so:
```json
{
"settings": {
"analysis": {
"filter": {
"url_host": {
"type": "url",
"part": "host"
}
},
"analyzer": {
"url_host": {
"filter": ["url_host"],
"tokenizer": "whitespace"
}
}
}
},
"mappings": {
"example_type": {
"properties": {
"url": {
"type": "multi_field",
"fields": {
"url": {"type": "string"},
"host": {"type": "string", "analyzer": "url_host"}
}
}
}
}
}
}
```
164 changes: 164 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-analysis-url</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<description>Elasticsearch URL token filter plugin</description>

<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>9</version>
</parent>

<properties>
<project.build.sourceEncodint>UTF-8</project.build.sourceEncodint>
<elasticsearch.version>1.4.2</elasticsearch.version>
<lucene.version>4.10.2</lucene.version>
<hamcrest.version>1.3</hamcrest.version>
<tests.output>onerror</tests.output>
<tests.shuffle>true</tests.shuffle>
<es.config>elasticsearch.yml</es.config>
<es.logger.level>INFO</es.logger.level>
</properties>

<repositories>
<repository>
<id>sonatype</id>
<url>http://oss.sonatype.org/content/repositories/releases</url>
</repository>
</repositories>

<dependencies>

<!--test dependencies -->

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<scope>compile</scope>
</dependency>

<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
<version>2.3.9</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>${hamcrest.version}</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
<scope>test</scope>
<optional>true</optional>
</dependency>
</dependencies>

<build>
<testResources>
<testResource>
<directory>${basedir}/src/test/java</directory>
<includes>
<include>**/*.json</include>
<include>**/*.yml</include>
<include>**/*.txt</include>
<include>**/*.properties</include>
</includes>
<filtering>true</filtering>
</testResource>
<testResource>
<directory>${basedir}/src/test/resources</directory>
<includes>
<include>**/*.*</include>
</includes>
</testResource>
</testResources>

<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>

<plugin>
<groupId>com.carrotsearch.randomizedtesting</groupId>
<artifactId>junit4-maven-plugin</artifactId>
<version>2.1.11</version>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.18.1</version>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.3</version>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<outputDirectory>${project.build.directory}/releases/</outputDirectory>
<descriptors>
<descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
26 changes: 26 additions & 0 deletions src/main/assemblies/plugin.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?xml version="1.0"?>
<assembly>
<id>plugin</id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<excludes>
<exclude>org.elasticsearch:elasticsearch</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>

</includes>
</dependencySet>
</dependencySets>
</assembly>
26 changes: 26 additions & 0 deletions src/main/java/org/elasticsearch/index/analysis/URLPart.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.elasticsearch.index.analysis;

import org.elasticsearch.ElasticsearchIllegalArgumentException;

/**
* Joe Linn
* 1/17/2015
*/
public enum URLPart {
PROTOCOL,
HOST,
PATH,
REF,
QUERY,
PORT,
WHOLE;

public static URLPart fromString(String part) {
for (URLPart urlPart : URLPart.values()) {
if (urlPart.name().equalsIgnoreCase(part)) {
return urlPart;
}
}
throw new ElasticsearchIllegalArgumentException(String.format("Unrecognized URL part: %s", part));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.elasticsearch.index.analysis;

import org.elasticsearch.index.analysis.url.URLTokenFilter;

/**
* Joe Linn
* 1/17/2015
*/
public class URLTokenAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
@Override
public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
tokenFiltersBindings.processTokenFilter(URLTokenFilter.NAME, URLTokenFilterFactory.class);
}
}
Loading

0 comments on commit 880bb2d

Please sign in to comment.