Skip to content

Commit

Permalink
Merge pull request #6 from industrydive/ENGA-933-custom-field-decorator
Browse files Browse the repository at this point in the history
Resolves ENGA-923 "Modify django-bigquery-exporter base to include decorator for custom_field"
  • Loading branch information
DevDaveFrame authored Nov 29, 2023
2 parents 3b6411f + b6d8a01 commit 6f4cc9c
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 69 deletions.
39 changes: 28 additions & 11 deletions bigquery_exporter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
import logging


def custom_field(method):
"""
Decorator to mark a method as a custom field for a BigQueryExporter subclass.
"""
# Ensure that the method has exactly two arguments: self and the Django model instance
assert method.__code__.co_argcount == 2, \
'Custom field methods must have exactly two arguments: self and the Django model instance'
method.is_custom_field = True
return method

def batch_qs(qs, batch_size=1000):
"""
Returns a (start, end, total, queryset) tuple for each batch in the given
Expand Down Expand Up @@ -41,9 +51,12 @@ def __init__(self):
except GoogleAPICallError as e:
logging.error(f'Error while creating BigQuery client: {e}')

for field in self.custom_fields:
if not hasattr(self, field):
raise ValueError(f'Custom field {field} is not defined')
for field in self.fields:
# check that all fields are valid (either a model field or a custom field method)
if not hasattr(self.model, field) and not hasattr(self, field):
raise Exception(
f'Invalid field {field} for model {self.model}. Must be a model field or a custom field method.'
)

def define_queryset(self):
"""
Expand All @@ -66,7 +79,8 @@ def export(self, pull_date=None, *args, **kwargs):
Export data to BigQuery.
Args:
pull_date (datetime.datetime, optional): The datetime used to populate the pull_date field. If not provided, the current date and time will be used.
pull_date (datetime.datetime, optional): The datetime used to populate the pull_date field.
If not provided, the current date and time will be used.
Raises:
Exception: If an error occurs while exporting the data.
Expand Down Expand Up @@ -97,13 +111,16 @@ def _push_to_bigquery(self, data):

def _process_queryset(self, queryset, pull_time):
processed_queryset = []
for obj in queryset:
processed_dict = {}
processed_dict['pull_date'] = pull_time.strftime('%Y-%m-%d %H:%M:%S')
for model_instance in queryset:
processed_dict = {'pull_date': pull_time.strftime('%Y-%m-%d %H:%M:%S')}
for field in self.fields:
processed_dict[field] = getattr(obj, field)
for field in self.custom_fields:
if hasattr(self, field):
processed_dict[field] = getattr(self, field)(obj)
# if the field appears in the exporter class, check if it's a custom field method
exporter_field = getattr(self, field, None)
if callable(exporter_field) and getattr(exporter_field, 'is_custom_field', False):
# If the field is a custom field method, call the method with the model instance
processed_dict[field] = exporter_field(model_instance)
else:
# Regular field
processed_dict[field] = getattr(model_instance, field)
processed_queryset.append(processed_dict)
return processed_queryset
7 changes: 5 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = django-bigquery-exporter
version = 0.1.1
version = 0.1.2
description = A Django plugin for exporting CMS data to Google BigQuery.
long_description = file: README.rst
url = https://www.industrydive.com/
Expand Down Expand Up @@ -28,4 +28,7 @@ packages = find:
python_requires = >=3.8
install_requires =
Django >= 3.2.5
google-cloud-bigquery >= 2.30.1
google-cloud-bigquery >= 2.30.1

[flake8]
max-line-length = 120
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@
],
name='django-bigquery-exporter',
packages=['bigquery_exporter'],
version='0.1.1',
version='0.1.2',
)
117 changes: 62 additions & 55 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
import datetime
from google.api_core.exceptions import GoogleAPICallError
from bigquery_exporter.base import batch_qs, BigQueryExporter
from bigquery_exporter.base import batch_qs, custom_field, BigQueryExporter


class TestBatchQS:
Expand Down Expand Up @@ -31,72 +32,78 @@ def test_batch_qs_large_batch_size(self, qs_factory):
assert batches[0] == (0, 5, 5, [1, 2, 3, 4, 5])


class TestBigQueryExporter:
def test_export_calls_define_queryset(self, mocker, mock_client, mock_model):
class TestExporter(BigQueryExporter):
model = mock_model
table_name = 'test'

exporter = TestExporter()
exporter.client = mock_client
exporter.define_queryset = mocker.MagicMock()
exporter.export()
exporter.define_queryset.assert_called_once()

def test_export_calls_process_queryset(self, mocker, mock_client, mock_model, qs_factory):
@pytest.fixture
def test_exporter_factory(mocker, mock_client, mock_model, qs_factory):
def create_test_exporter(num_querysets=5, table='test_table'):
class TestExporter(BigQueryExporter):
model = mock_model
table_name = 'test'
table_name = table

exporter = TestExporter()
exporter.client = mock_client
exporter.model = mock_model
exporter.define_queryset = mocker.MagicMock()
exporter.define_queryset.return_value = qs_factory(5)
exporter.define_queryset.return_value = qs_factory(num_querysets)
exporter._process_queryset = mocker.MagicMock()
exporter.export()
exporter._process_queryset.assert_called()
exporter._push_to_bigquery = mocker.MagicMock()
return exporter

def test_export_calls_push_to_bigquery(self, mocker, mock_client, mock_model, qs_factory):
class TestExporter(BigQueryExporter):
model = mock_model
table_name = 'test'
return create_test_exporter

exporter = TestExporter()
exporter.client = mock_client
exporter.model = mock_model
exporter.define_queryset = mocker.MagicMock()
exporter.define_queryset.return_value = qs_factory(5)
exporter._push_to_bigquery = mocker.MagicMock()
exporter.export()
exporter._push_to_bigquery.assert_called()

def test_export_logs_error_on_google_api_call_error(self, mocker, mock_client, mock_model, caplog, qs_factory):
class TestExporter(BigQueryExporter):
model = mock_model
table_name = 'test'
class TestBigQueryExporter:
@pytest.fixture
def test_exporter(self, test_exporter_factory):
return test_exporter_factory()

exporter = TestExporter()
exporter.client = mock_client
exporter.model = mock_model
exporter.define_queryset = mocker.MagicMock()
exporter.define_queryset.return_value = qs_factory(5)
exporter._push_to_bigquery = mocker.MagicMock()
exporter._push_to_bigquery.side_effect = GoogleAPICallError('Error')
exporter.export()
def test_export_calls_define_queryset(self, test_exporter):
test_exporter.export()
test_exporter.define_queryset.assert_called_once()

def test_export_calls_process_queryset(self, test_exporter):
test_exporter.export()
test_exporter._process_queryset.assert_called()

def test_export_calls_push_to_bigquery(self, test_exporter):
test_exporter.export()
test_exporter._push_to_bigquery.assert_called()

def test_export_logs_error_on_google_api_call_error(self, test_exporter, caplog):
test_exporter._push_to_bigquery.side_effect = GoogleAPICallError('Error')
test_exporter.export()
assert 'Error while exporting' in caplog.text

def test_export_logs_error_on_exception(self, mocker, mock_client, mock_model, caplog, qs_factory):
class TestExporter(BigQueryExporter):
model = mock_model
table_name = 'test'
def test_export_logs_error_on_exception(self, test_exporter, caplog):
test_exporter._push_to_bigquery.side_effect = Exception('Error')
test_exporter.export()
assert 'Error while exporting' in caplog.text

exporter = TestExporter()
exporter.client = mock_client
exporter.model = mock_model
exporter.define_queryset = mocker.MagicMock()
exporter.define_queryset.return_value = qs_factory(5)
exporter._process_queryset = mocker.MagicMock()
exporter._process_queryset.side_effect = Exception('Error')
exporter.export()
assert 'Error while exporting' in caplog.text
def test_custom_field_decorator_sets_custom_attribute_on_callable(self):
@custom_field
def test_field(self, obj):
pass

assert test_field.is_custom_field

def test_custom_field_succeeds_during_processing(mocker, mock_client, mock_model):
mock_model.field_value = 1

class TestBigQueryExporter(BigQueryExporter):
model = mock_model
table_name = 'test_table'
fields = ['field_value', 'custom_field']

@custom_field
def custom_field(self, obj):
return obj.field_value * 2

# make sure we're mocking bigquery.Client
exporter = TestBigQueryExporter()
mock_queryset = [mock_model]
pull_time = datetime.datetime(2023, 1, 1, 0, 0, 0)
processed_data = exporter._process_queryset(mock_queryset, pull_time)

assert len(processed_data) == len(mock_queryset)
for original, processed in zip(mock_queryset, processed_data):
assert processed['field_value'] == 1
assert processed['custom_field'] == 2

0 comments on commit 6f4cc9c

Please sign in to comment.