Skip to content

Commit

Permalink
Add fuzzy-matching option for tag names
Browse files Browse the repository at this point in the history
  • Loading branch information
fredex42 committed Jul 14, 2024
1 parent 01b2597 commit bd1e54b
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ import java.nio.charset.StandardCharsets
import java.util.Base64
import scala.util.Try
import io.circe.syntax._
@deprecated("you should be using com.gu.contentapi.porter.graphql")

case class Edge[T:io.circe.Decoder](totalCount:Long, endCursor:Option[String], hasNextPage:Boolean, nodes:Seq[T]) {
def map[V:io.circe.Decoder](mapper:(T)=>V) = Edge[V](totalCount, endCursor, hasNextPage, nodes.map(mapper))
}
@deprecated("you should be using com.gu.contentapi.porter.graphql")

object Edge {
private val logger = LoggerFactory.getLogger(getClass)
private val encoder = Base64.getEncoder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ object RootQuery {
arguments = TagQueryParameters.AllTagQueryParameters,
resolve = ctx =>
ctx.ctx.repo.marshalledTags(ctx arg TagQueryParameters.QueryString,
ctx arg TagQueryParameters.Fuzziness,
ctx arg TagQueryParameters.tagId,
ctx arg TagQueryParameters.Section,
ctx arg TagQueryParameters.TagType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,29 @@ object TagQueryParameters {
)
)

val FuzzinessOptions = EnumType(
"FuzzinessOptions",
Some("Valid options for making a fuzzy-match query"),
List(
EnumValue("AUTO",
value="AUTO",
description=Some("Generates an edit distance based on the length of the term. If the term is >5 chars, then 2 edits allowed; if <3 chars than no edits allowed")
),
EnumValue("OFF",
value="OFF",
description=Some("Disable fuzzy-matching")
)
)
)

val tagId = Argument("tagId", OptionInputType(StringType), description = "Retrieve this specific tag")
val Section = Argument("section", OptionInputType(StringType), description = "Only return tags from this section")
val TagType = Argument("type", OptionInputType(TagTypes), description = "Type of the tag to return")
val QueryString = Argument("q", OptionInputType(StringType), description = "Generic Lucene query string for finding tags")
val QueryString = Argument("q", OptionInputType(StringType), description = "Search for tags that match this public-facing name")
val Fuzziness = Argument("fuzzy", OptionInputType(FuzzinessOptions), description = "Perform a fuzzy-matching query (default). Set to `OFF` to disable fuzzy-matching.")
val Category = Argument("category", OptionInputType(StringType), description = "A category to match against tags")
val Reference = Argument("reference", OptionInputType(StringType), description = "A reference to match against tags")
val AllTagQueryParameters = QueryString :: tagId :: Section :: TagType :: Category ::
val AllTagQueryParameters = QueryString :: tagId :: Section :: TagType :: Fuzziness :: Category ::
Reference :: Cursor :: OrderBy :: Limit :: Nil

val NonPaginatedTagQueryParameters = Section :: TagType :: Nil
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/datastore/DocumentRepo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ trait DocumentRepo {
limit: Option[Int], cursor: Option[String]): Future[Edge[Content]]

def marshalledTags(maybeQuery:Option[String],
maybeFuzziness:Option[String],
maybeTagId:Option[String],
maybeSection: Option[String],
tagType:Option[String],
Expand Down
32 changes: 21 additions & 11 deletions src/main/scala/datastore/ElasticsearchRepo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import com.sksamuel.elastic4s.ElasticDsl._
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import com.sksamuel.elastic4s.requests.searches.SearchResponse
import com.sksamuel.elastic4s.requests.searches.queries.{ExistsQuery, NestedQuery, Query, RangeQuery}
import com.sksamuel.elastic4s.requests.searches.queries.{DisMaxQuery, ExistsQuery, Fuzzy, FuzzyQuery, NestedQuery, Query, RangeQuery}
import com.sksamuel.elastic4s.requests.searches.queries.compound.BoolQuery
import com.sksamuel.elastic4s.requests.searches.queries.matches.{FieldWithOptionalBoost, MatchAllQuery, MatchQuery, MultiMatchQuery}
import com.sksamuel.elastic4s.requests.searches.sort.{FieldSort, ScoreSort, Sort, SortOrder}
Expand Down Expand Up @@ -185,8 +185,19 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20

private def tagQueryParams(maybeTagId:Option[String], maybeSection:Option[String],
tagType:Option[String], maybeCategory:Option[String],
maybeReferences: Option[String]):Seq[Query] = {
maybeReferences: Option[String], queryString:Option[String], fuzziness:Option[String]):Seq[Query] = {
Seq(
queryString.map(qs=>{
if(fuzziness.getOrElse("AUTO") != "OFF") {
//Why DisMax here? Because we want to include exact-matches as well, if they are relevant. E.g. FuzzyQuery on "politics" returns no results!
DisMaxQuery(Seq(
FuzzyQuery("webTitle", qs, fuzziness),
MatchQuery("webTitle", qs)
))
} else {
MatchQuery("webTitle", qs)
}
}),
maybeTagId.map(MatchQuery("id", _)),
maybeSection.map(MatchQuery("sectionId", _)),
tagType.map({
Expand All @@ -204,18 +215,16 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20
private def buildTagQuery(maybeTagId:Option[String],
maybeSection:Option[String],
tagType:Option[String], maybeQuery:Option[String],
maybeFuzziness:Option[String],
maybeCategory:Option[String], maybeReferences:Option[String]) = {
val baseSearch = search("tag")
val searchWithQuery = maybeQuery match {
case Some(q)=>baseSearch.query(q)
case None=>baseSearch
}
val params = tagQueryParams(maybeTagId, maybeSection, tagType, maybeCategory, maybeReferences)

val params = tagQueryParams(maybeTagId, maybeSection, tagType, maybeCategory, maybeReferences, maybeQuery, maybeFuzziness)

if(params.isEmpty) {
searchWithQuery
baseSearch
} else {
searchWithQuery.query(BoolQuery(must=params))
baseSearch.query(BoolQuery(must=params))
}
}

Expand All @@ -230,6 +239,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20
//FIXME: tagsForList / marshalledTags could be DRY'd out a bit

override def marshalledTags(maybeQuery:Option[String],
maybeFuzziness:Option[String],
maybeTagId:Option[String],
maybeSection: Option[String],
tagType:Option[String],
Expand All @@ -249,7 +259,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20
Edge.decodeCursor(cursor) match {
case Right(maybeCursor)=>
client.execute {
buildTagQuery(maybeTagId, maybeSection, tagType, maybeQuery, maybeCategory, maybeReferences)
buildTagQuery(maybeTagId, maybeSection, tagType, maybeQuery, maybeFuzziness, maybeCategory, maybeReferences)
.sortBy(sortParam)
.limit(pageSize)
.searchAfter(maybeCursor)
Expand Down Expand Up @@ -281,7 +291,7 @@ class ElasticsearchRepo(endpoint:ElasticNodeEndpoint, val defaultPageSize:Int=20
val tagIdMatches = tagIdList.map(MatchQuery("id", _))

val response = client.execute {
val restrictions = tagQueryParams(None, maybeSection, tagType, maybeCategory, maybeReferences)
val restrictions = tagQueryParams(None, maybeSection, tagType, maybeCategory, maybeReferences, None, None)

if(restrictions.nonEmpty) {
search("tag").query(
Expand Down

0 comments on commit bd1e54b

Please sign in to comment.