Skip to content

Commit

Permalink
refs #116 - filter by language based on ocr text in solr.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnscancella committed Apr 9, 2018
1 parent 252fd1c commit 07d105f
Showing 1 changed file with 12 additions and 10 deletions.
22 changes: 12 additions & 10 deletions core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,15 +407,14 @@ def page_search(d):
q.append('+date:[%i TO %i]' % (d1, d2))

ocrs = ['ocr_%s' % l for l in settings.SOLR_LANGUAGES]

lang = d.get('language', None)

lang_full = models.Language.objects.get(code=str(lang)) if lang else None
if lang_full:
q.append('+language:%s' % lang_full)

ortext = d.get('ortext', None)
andtext = d.get('andtext', None)
phrasetext = d.get('phrasetext', None)
proxtext = d.get('proxtext', None)
ocr_lang = 'ocr_' + lang if lang else 'ocr'
if d.get('ortext', None):

if ortext:
q.append('+((' + query_join(solr_escape(d['ortext']).split(' '), "ocr"))
if lang:
q.append(' AND ' + query_join(solr_escape(d['ortext']).split(' '), ocr_lang))
Expand All @@ -425,7 +424,7 @@ def page_search(d):
for ocr in ocrs:
q.append('OR ' + query_join(solr_escape(d['ortext']).split(' '), ocr))
q.append(')')
if d.get('andtext', None):
if andtext:
q.append('+((' + query_join(solr_escape(d['andtext']).split(' '), "ocr", and_clause=True))
if lang:
q.append('AND ' + query_join(solr_escape(d['andtext']).split(' '), ocr_lang, and_clause=True))
Expand All @@ -435,7 +434,7 @@ def page_search(d):
for ocr in ocrs:
q.append('OR ' + query_join(solr_escape(d['andtext']).split(' '), ocr, and_clause=True))
q.append(')')
if d.get('phrasetext', None):
if phrasetext:
phrase = solr_escape(d['phrasetext'])
q.append('+((' + 'ocr' + ':"%s"^10000' % (phrase))
if lang:
Expand All @@ -447,7 +446,7 @@ def page_search(d):
q.append('OR ' + ocr + ':"%s"' % (phrase))
q.append(')')

if d.get('proxtext', None):
if proxtext:
distance = d.get('proxdistance', PROX_DISTANCE_DEFAULT)
prox = solr_escape(d['proxtext'])
q.append('+((' + 'ocr' + ':("%s"~%s)^10000' % (prox, distance))
Expand All @@ -462,6 +461,9 @@ def page_search(d):
if d.get('sequence', None):
q.append('+sequence:"%s"' % d['sequence'])

if not ortext and not andtext and not phrasetext and not proxtext:
q.append('+%s:*' % ocr_lang)

solr_query = ' '.join(q)
LOGGER.debug("Solr query is [%s]", solr_query)
return solr_query
Expand Down

0 comments on commit 07d105f

Please sign in to comment.