Skip to content
This repository has been archived by the owner on Sep 23, 2024. It is now read-only.

Commit

Permalink
Merge pull request #9 from transferwise/support-binary-cols
Browse files Browse the repository at this point in the history
1.1.1: Handle trailing zeros added by BINARY type column
  • Loading branch information
Samira-El authored Jan 7, 2020
2 parents bf45009 + 25f2ff6 commit 418596f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import setup

setup(name='pipelinewise-tap-mysql',
version='1.1.0',
version='1.1.1',
description='Singer.io tap for extracting data from MySQL - PipelineWise compatible',
author='Stitch',
url='https://github.com/transferwise/pipelinewise-tap-mysql',
Expand Down
18 changes: 16 additions & 2 deletions tap_mysql/sync_strategies/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,22 @@ def row_to_singer_record(catalog_entry, version, row, columns, time_extracted):
row_to_persist += (timedelta_from_epoch.isoformat() + '+00:00',)

elif isinstance(elem, bytes):
# encode bytes as hex
row_to_persist += (codecs.encode(elem, 'hex'),)
# Removes all trailing '\x00' from the bytes
# Trailing zeroes in bytes is caused by how BINARY type columns pads the inserted values,
# this doesn't happen when using VARBINARY.
#
# ref: https://dev.mysql.com/doc/refman/8.0/en/binary-varbinary.html
#
# Leaving the trailing zeroes makes FT & Incremental behave differently than Binlog and we end up with
# inconsistent state records.
#
# Example: for a binary value b'pk0'
# - Binlog would read it as b'pk0' then in HEX it would be b'706b30'
# - FT & Inc would read it as b'pk0\x00\x00\x00\x00....\x00' then in HEX it would be b'706b300000...0'
stripped_utf8_elem = elem.decode().rstrip('\x00').encode()

# encode the stripped elem to hex
row_to_persist += (codecs.encode(stripped_utf8_elem, 'hex'),)

elif 'boolean' in property_type or property_type == 'boolean':
if elem is None:
Expand Down

0 comments on commit 418596f

Please sign in to comment.