Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@
<version>3.8</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>64.2</version>
</dependency>

</dependencies>
<build>
<plugins>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,17 @@ public static Function<String, String> trim() {
return (str) -> StringUtils.trim(str);
}

/**
* Uses com.ibm.icu.text.Transliterator
* Returns a function that takes a string as input and returns the transliterated version of the name.
*
* @return the function to perform transliteration
*/

public static Function<String, String> transliterateName(){
return (str) -> Utils.transliterateName(str);
}

/**
* Uses Apache commons StringUtils lowerCase method
*
Expand Down Expand Up @@ -76,12 +87,12 @@ public static Function<String, String> addressPreprocessing() {
}

/**
* applies "removeTrailingNumber", "removeSpecialChars" and "nameNormalization" functions
* applies "removeTrailingNumber", "removeSpecialChars", "transliterateName" and "nameNormalization" functions
*
* @return the function to perform namePreprocessing
*/
public static Function<String, String> namePreprocessing() {
return (str) -> removeTrailingNumber().andThen(removeSpecialChars()).andThen(nameNormalization()).apply(str);
return (str) -> transliterateName().andThen(removeTrailingNumber()).andThen(removeSpecialChars()).andThen(nameNormalization()).apply(str);
}

/**
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/com/intuit/fuzzymatcher/util/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.ibm.icu.text.Transliterator;


public class Utils {

Expand Down Expand Up @@ -53,4 +55,21 @@ public static String getNormalizedString(String str, Map<String, String> dict) {
public static boolean isNumeric(String str) {
return str.matches(".*\\d.*");
}

/**
* Transliterates the given text from any language to English without accents.
*
* @param text the text to be transliterated
* @return the transliterated text in English without accents
*
* https://github.com/crteezy/java-translator-transliterator-api/blob/master/src/main/java/org/github/crteezy/Main.java
*/

public static String transliterateName(String text) {

String configuration = "Any-Eng; nfd; [:nonspacing mark:] remove; nfc"; // Any language to English without accent
Transliterator transliterator = Transliterator.getInstance(configuration);
return transliterator.transliterate(text);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,38 @@ public void itShouldApplyMatchByDocId() {
Assert.assertEquals(2, result.size());
}

@Test
public void itShouldApplyMatchByDocIdWithNonEnglishValues() {
String[][] input = {
{"1", "Steven Wilson", "45th Avenue 5th st."},
{"2", "John Doe", "546 freeman ave"},
{"3", "Stephen Wilkson", "45th Ave 5th Street"},
{"4", "Hagar Usama", "Nasr city - 8th neighborhood"},
{"5","هاجر أُسامة", "Nasr city, neighborhod 8"},
{"6","Asala Wasel", ""},
{"7","아살라 와셀", ""},
{"8","ستيفين ويلسون", "45 Ave St 5"}

};

List<Document> documentList = Arrays.asList(input).stream().map(contact -> {
return new Document.Builder(contact[0])
.addElement(new Element.Builder<String>().setValue(contact[1]).setType(NAME).createElement())
.addElement(new Element.Builder<String>().setValue(contact[2]).setType(ADDRESS).createElement())
.createDocument();
}).collect(Collectors.toList());

Map<String, List<Match<Document>>> result = matchService.applyMatchByDocId(documentList);
result.entrySet().forEach(entry -> {
entry.getValue().forEach(match -> {
System.out.println("Data: " + match.getData() + " Matched With: " + match.getMatchedWith() + " Score: " + match.getScore().getResult());
});
});


Assert.assertEquals(7, result.size());
}

@Test
public void itShouldApplyMatchByGroups() {
String[][] input = {
Expand Down