Skip to content

Commit

Permalink
Merge pull request #2 from jnyryan/fix-qalt
Browse files Browse the repository at this point in the history
Fix qalt
  • Loading branch information
jnyryan authored Feb 4, 2019
2 parents 122a4a5 + 5ecc423 commit 3ff476a
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 56 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@
hs_err_pid*

.DS_Store
.classpath
.project
.settings/
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ The classpath to your component folder
```

Register the custom component
``` xml
<searchComponent name="unicodeQuoteComponent" class="com.jnyryan.solr.components.FoldUnicodeQuotes"/>
```
``` xml
<searchComponent name="unicodeQuoteComponent" class="com.jnyryan.solrUnicodeQuoteComponent.FoldUnicodeQuotes"/>
```

In the request handler add the component as a first-component
``` xml
Expand Down
10 changes: 5 additions & 5 deletions build.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<project name="solr-unicode-quote-component" basedir=".">

<property name="src.dir" value="src"/>
<property name="version" value="1.0.2"/>
<property name="src.dir" value="src"/>
<property name="test.dir" value="test"/>
<property name="dist.dir" value="dist"/>
<property name="build.dir" value="build"/>
<property name="classes.dir" value="${build.dir}/classes"/>
<property name="jar.dir" value="${dist.dir}/jar"/>
<property name="main1-class" value="com.jnyryan.solrUnicodeQuoteComponent.FoldUnicodeQuotemddds"/>
<property name="main-class" value="com.jnyryan.solrUnicodeQuoteComponent.SolrUnicodeQuoteComponent"/>
<property name="lib.dir" value="lib"/>
<property name="solr.version" value="7.5"/>
Expand Down Expand Up @@ -41,9 +41,9 @@
<attribute name="Specification-Title" value="Apache Solr Search Server: solr-unicode-quote-component" />
<attribute name="Specification-Version" value="${solr.version}" />
<attribute name="Specification-Vendor" value="The Apache Software Foundation" />
<attribute name="Implementation-Title" value="org.apache.solr" />
<attribute name="Implementation-Version" value="1.0.0" />
<attribute name="Implementation-Vendor" value="GLG" />
<attribute name="Implementation-Title" value="com.jnyryan.solrUnicodeQuoteComponent" />
<attribute name="Implementation-Version" value="${version}" />
<attribute name="Implementation-Vendor" value="jnyryan" />
<attribute name="Main-Class" value="${main-class}"/>
<attribute name="Java-Target-Version" value="${java.target}"/>

Expand Down
97 changes: 50 additions & 47 deletions src/com/jnyryan/solrUnicodeQuoteComponent/FoldUnicodeQuotes.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,63 +18,66 @@

import java.io.IOException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.request.SolrQueryRequest;

public class FoldUnicodeQuotes extends SearchComponent {

@Override
public void prepare(ResponseBuilder rb) throws IOException {
updateSolrRequest(rb.req);
}
@Override
public void prepare(ResponseBuilder rb) throws IOException {
updateSolrRequest(rb, CommonParams.Q);
updateSolrRequest(rb, DisMaxParams.ALTQ);
}

@Override
public void process(ResponseBuilder rb) throws IOException {
}
@Override
public void process(ResponseBuilder rb) throws IOException {}

@Override
public String getDescription() {
return "Solr Unicode Quote Seqarch Component";
}
@Override
public String getDescription() {
return "Solr Unicode Quote Seqarch Component";
}

/**
* Grab the Q and Q.ALT parameters and remove UNICODE quotes from them.
*
* @param req the request to the SOLR handler
*/
public void updateSolrRequest(SolrQueryRequest req) {
SolrParams params = req.getParams();
String newQuery = replaceUnicodeDoubleQuotes(params.get(CommonParams.Q));
ModifiableSolrParams newParams = new ModifiableSolrParams(params);
newParams.remove(CommonParams.Q);
newParams.add(CommonParams.Q, newQuery);
req.setParams(newParams);
}
/**
* Grab the Q and Q.ALT parameters and remove UNICODE quotes from them.
*
* @param req the request to the SOLR handler
*/
public void updateSolrRequest(ResponseBuilder rb, String queryParam) {
SolrParams params = rb.req.getParams();
String target = params.get(queryParam);
if(target != null && !target.isEmpty()) {
String newQuery = replaceUnicodeDoubleQuotes(target);
ModifiableSolrParams newParams = new ModifiableSolrParams(params);
newParams.remove(queryParam);
newParams.add(queryParam, newQuery);
rb.req.setParams(newParams);
}
}

/**
* Replace UNICODE double quotes with basic Latin standard quote mark (") - &#34; &quot;
* Replaces:
* “ left double quotation mark (\u201C)
* ” right double quotation mark (\u201D)
* 〝 left curly quote (\u301D)
* 〞right curly quote (\u301E)
* „ index quote german scandanavian (\u201E)
* « left-pointing double angle quotation mark (\AB)
* » right-pointing double angle quotation mark (\BB)
* ‟ double high-reversed-9 quotation mark (\u201F)
* ❝ heavy double turned comma quotation mark ornament (\u275D)
* ❞ heavy double comma quotation mark ornament (\u275E)
* ⹂ double low-reversed-9 quotation mark - (\u2E42)
* "fullwidth quotation mark - (\uFF02)
*
* @param s The query string
* @return Returns {@code s} with UNICODE double quotes replaced as standard double quote
*/
public String replaceUnicodeDoubleQuotes(String s) {
return s.replaceAll("[“”〝〞„«»‟❝❞⹂"]","\"");
}
/**
* Replace UNICODE double quotes with basic Latin standard quote mark (") - &#34; &quot;
* Replaces:
* “ left double quotation mark (\u201C)
* ” right double quotation mark (\u201D)
* 〝 left curly quote (\u301D)
* 〞right curly quote (\u301E)
* „ index quote german scandanavian (\u201E)
* « left-pointing double angle quotation mark (\AB)
* » right-pointing double angle quotation mark (\BB)
* ‟ double high-reversed-9 quotation mark (\u201F)
* ❝ heavy double turned comma quotation mark ornament (\u275D)
* ❞ heavy double comma quotation mark ornament (\u275E)
* ⹂ double low-reversed-9 quotation mark - (\u2E42)
* "fullwidth quotation mark - (\uFF02)
*
* @param s The query string
* @return Returns {@code s} with UNICODE double quotes replaced as standard double quote
*/
public String replaceUnicodeDoubleQuotes(String s) {
return s.replaceAll("[“”〝〞„«»‟❝❞⹂"]","\"");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ public void curlyQuotes() {
@Test
public void theRestOfTheQuotes() {
FoldUnicodeQuotes fuq = new FoldUnicodeQuotes();

assertEquals("we are the \"music makers\" ",fuq.replaceUnicodeDoubleQuotes("we are the „music makers„ "));
assertEquals("we are the \"music makers\" ",fuq.replaceUnicodeDoubleQuotes("we are the «music makers» "));
assertEquals("we are the \"music makers\" ",fuq.replaceUnicodeDoubleQuotes("we are the ‟music makers‟ "));
Expand Down

0 comments on commit 3ff476a

Please sign in to comment.