Skip to content

Commit

Permalink
Filter candidate annotations by dates
Browse files Browse the repository at this point in the history
This should limit the Annotation.id that need to be compared with
AnnotationSlim.pubid making the query faster.

The default values here avoid having to sync the deploy with the
h-periodic task.

The since value will pick the first annotations in DB and the until will
provide enough annotations (~2M annos) to work on for a few days.
  • Loading branch information
marcospri committed Nov 15, 2023
1 parent 1e8b814 commit ffa6d80
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
9 changes: 7 additions & 2 deletions h/tasks/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


@celery.task
def fill_annotation_slim(batch_size=1000):
def fill_annotation_slim(batch_size=1000, since="2012-01-01", until="2017-12-31"):
"""Task to fill the new AnnotationSlim table in batches."""
# pylint:disable=no-member

Expand All @@ -15,7 +15,12 @@ def fill_annotation_slim(batch_size=1000):
annotations = (
celery.request.db.query(Annotation)
.outerjoin(AnnotationSlim)
.where(AnnotationSlim.pubid.is_(None), Annotation.deleted.is_(False))
.where(
AnnotationSlim.pubid.is_(None),
Annotation.deleted.is_(False),
Annotation.created >= since,
Annotation.created <= until,
)
.limit(batch_size)
)

Expand Down
9 changes: 7 additions & 2 deletions tests/unit/h/tasks/annotations_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime, timedelta
from unittest.mock import call

import pytest
Expand All @@ -13,10 +14,14 @@ class TestFillPKAndUserId:
USERNAME_2 = "USERNAME_2"

def test_it(self, factories, annotation_write_service):
annos = factories.Annotation.create_batch(10)
now = datetime.now()
annos = factories.Annotation.create_batch(10, created=now)
factories.Annotation.create_batch(10, deleted=True)
factories.Annotation.create_batch(10, created=now + timedelta(days=10))

fill_annotation_slim(batch_size=10)
fill_annotation_slim(
batch_size=10, since=now - timedelta(days=1), until=now + timedelta(days=1)
)

annotation_write_service.upsert_annotation_slim.assert_has_calls(
[call(anno) for anno in annos], any_order=True
Expand Down

0 comments on commit ffa6d80

Please sign in to comment.