diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 456e176..6f68c4a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: os: - - macos-11 + - macos-latest - ubuntu-latest python-version: - "3.8" diff --git a/ais_tools/ais.py b/ais_tools/ais.py index 7cd5853..6058b5f 100644 --- a/ais_tools/ais.py +++ b/ais_tools/ais.py @@ -1,5 +1,6 @@ from ais_tools.transcode import DecodeError from ais_tools.transcode import ASCII8toAIS6 +from ais_tools import ais_1_2_3 from ais_tools import ais5 from ais_tools import ais8 from ais_tools import ais9 @@ -19,6 +20,9 @@ } decode_fn = { + 1: ais_1_2_3.ais_1_2_3_decode, + 2: ais_1_2_3.ais_1_2_3_decode, + 3: ais_1_2_3.ais_1_2_3_decode, 5: ais5.ais5_decode, 8: ais8.ais8_decode, 9: ais9.ais9_decode, diff --git a/ais_tools/ais25.py b/ais_tools/ais25.py index 992e6ce..ed87e03 100644 --- a/ais_tools/ais25.py +++ b/ais_tools/ais25.py @@ -22,7 +22,7 @@ def ais25_decode(body, pad): # assume that the pad value was wrong and just ignore the extra bits at the end new_len = (len(text_bits) // 6) * 6 text_bits = text_bits[:new_len] - message['text'] = ''.join(text_bits.iterdecode(ASCII8toASCII6_decode_tree)) + message['text'] = ''.join(text_bits.decode(ASCII8toASCII6_decode_tree)) return message diff --git a/ais_tools/ais_1_2_3.py b/ais_tools/ais_1_2_3.py new file mode 100644 index 0000000..d50d8e2 --- /dev/null +++ b/ais_tools/ais_1_2_3.py @@ -0,0 +1,6 @@ +import ais as libais +from ais import DecodeError + + +def ais_1_2_3_decode(body, pad): + return libais.decode(body[:28], 0) diff --git a/ais_tools/core/methods.c b/ais_tools/core/methods.c index 873def8..9730a5b 100644 --- a/ais_tools/core/methods.c +++ b/ais_tools/core/methods.c @@ -8,44 +8,70 @@ PyObject * method_compute_checksum(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { + PyObject *py_str; + PyObject *result; const char *str; if (nargs != 1) return PyErr_Format(PyExc_TypeError, "compute_checksum expects 1 argument"); - str = PyUnicode_AsUTF8(PyObject_Str(args[0])); - return PyLong_FromLong(checksum(str)); + py_str = PyObject_Str(args[0]); + + str = PyUnicode_AsUTF8(py_str); + + result = PyLong_FromLong(checksum(str)); + + Py_DECREF(py_str); + + return result; } PyObject * method_compute_checksum_str(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { + PyObject *py_str; + PyObject *result; const char *str; char c_str[3]; if (nargs != 1) return PyErr_Format(PyExc_TypeError, "checksum_str expects 1 argument"); - str = PyUnicode_AsUTF8(PyObject_Str(args[0])); + py_str = PyObject_Str(args[0]); + + str = PyUnicode_AsUTF8(py_str); + checksum_str(c_str, str, ARRAY_LENGTH(c_str)); - return PyUnicode_FromString(c_str); + + result = PyUnicode_FromString(c_str); + + Py_DECREF(py_str); + + return result; } PyObject * method_is_checksum_valid(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { + PyObject *py_str; const char *str; char buffer[MAX_SENTENCE_LENGTH]; + size_t len; if (nargs != 1) return PyErr_Format(PyExc_TypeError, "checksum_str expects 1 argument"); - str = PyUnicode_AsUTF8(PyObject_Str(args[0])); + py_str = PyObject_Str(args[0]); - if (safe_strcpy(buffer, str, ARRAY_LENGTH(buffer)) >= ARRAY_LENGTH(buffer)) - return PyErr_Format(PyExc_ValueError, "String too long"); + str = PyUnicode_AsUTF8(py_str); - if (is_checksum_valid(buffer)) + len = safe_strcpy(buffer, str, ARRAY_LENGTH(buffer)); + + Py_DECREF(py_str); + + if (len >= ARRAY_LENGTH(buffer)) + return PyErr_Format(PyExc_ValueError, "String too long"); + else if (is_checksum_valid(buffer)) Py_RETURN_TRUE; else Py_RETURN_FALSE; diff --git a/ais_tools/core/strcpy.c b/ais_tools/core/strcpy.c index a1645b1..df1f9c4 100644 --- a/ais_tools/core/strcpy.c +++ b/ais_tools/core/strcpy.c @@ -1,4 +1,3 @@ -#include #include /* diff --git a/ais_tools/normalize.py b/ais_tools/normalize.py index 139acfc..7768075 100644 --- a/ais_tools/normalize.py +++ b/ais_tools/normalize.py @@ -1,6 +1,6 @@ from typing import Optional, Any from datetime import datetime -import hashlib +import xxhash import re from enum import Enum @@ -36,7 +36,7 @@ def in_range(value, valid_range): fields = { 'year': (1, 9999), 'month': (1, 12), - 'day': (1,31), + 'day': (1, 31), 'hour': (0, 23), 'minute': (0, 59), 'second': (0, 59) @@ -44,7 +44,10 @@ def in_range(value, valid_range): values = [(f, message.get(f), valid_range) for f, valid_range in fields.items()] if all(in_range(value, valid_range) for _, value, valid_range in values): values = {f: value for f, value, _ in values} - return datetime(**values).isoformat(timespec='seconds') + 'Z' + try: + return datetime(**values).isoformat(timespec='seconds') + 'Z' + except ValueError: + return None def coord_type(val: float, _min: float, _max: float, unavailable: float) -> POSITION_TYPE: @@ -168,16 +171,14 @@ def normalize_dedup_key(message: dict) -> Optional[str]: if 'nmea' not in message or 'tagblock_timestamp' not in message: return None - nmea = ''.join(re.findall(REGEX_NMEA, message['nmea'])) + nmea = ''.join(REGEX_NMEA.findall(message['nmea'])) if not nmea: return None # no nmea found in message timestamp = int(message['tagblock_timestamp'] / 60) key = f'{nmea}_{timestamp}'.encode('utf-8') - h = hashlib.sha1() - h.update(key) - return h.hexdigest()[:16] + return xxhash.xxh3_64_hexdigest(key) def map_field(message: dict, source_field: str = None) -> Optional[Any]: diff --git a/ais_tools/transcode.py b/ais_tools/transcode.py index 04926b5..43ed333 100644 --- a/ais_tools/transcode.py +++ b/ais_tools/transcode.py @@ -30,7 +30,7 @@ def bits_to_nmea(bits): pad = 0 else: bits = bits + (pad * bitarray('0')) - return ''.join(bits.iterdecode(ASCII8toAIS6_decode_tree)), pad + return ''.join(bits.decode(ASCII8toAIS6_decode_tree)), pad def nmea_to_bits(body, pad): @@ -73,7 +73,7 @@ def to_nmea(self): bits = self.bits else: bits = self.bits + (pad * bitarray('0')) - return ''.join(bits.iterdecode(ASCII8toAIS6_decode_tree)), pad + return ''.join(bits.decode(ASCII8toAIS6_decode_tree)), pad def pack(self, struct, message): self.pack_into(struct, self.offset, message) @@ -225,4 +225,4 @@ def decode(self, value): bits = bitarray() bits.frombytes(value) bits = bits[:self.nbits] - return ''.join(bits.iterdecode(ASCII8toASCII6_decode_tree)) + return ''.join(bits.decode(ASCII8toASCII6_decode_tree)) diff --git a/pyproject.toml b/pyproject.toml index a4d1014..870911a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,9 +17,10 @@ dependencies = [ "Click", "gpxpy", "requests", - "bitarray", + "bitarray>=3.0.0", "cbitstruct", "udatetime", + "xxhash" ] [project.urls] diff --git a/sample/type1-bad-bitcount.nmea b/sample/type1-bad-bitcount.nmea new file mode 100644 index 0000000..1e8287b --- /dev/null +++ b/sample/type1-bad-bitcount.nmea @@ -0,0 +1,13 @@ +\s:rMT4097,t:ais-listener,c:1697012399*7E\!AIVDM,1,1,,A,13UuUj0P00QgDCLEiGAcOOwl2<0Igw;,0*28 +\s:rMT1200,t:ais-listener,c:1697012048*78\!AIVDM,1,1,,A,33lP4T5P00QJoU6Fd12D`Jal<,0*7F +\s:rMT1200,t:ais-listener,c:1697011998*7F\!AIVDM,1,1,,A,13R3iq00001LhHDFVNd@RC@R0<0FPF=,0*6C +\s:rMT6030,t:ais-listener,c:1697011876*78\!AIVDM,1,1,,B,15Ca7j002HQer4PClUD<1qbH089PRn4,0*07 +\s:rMT6168,t:ais-listener,c:1697014351*7F\!AIVDM,1,1,,B,16eg3D1P00RT5kHCfSje>Ovr:`AhPc=,0*57 +\s:rMT6030,t:ais-listener,c:1697014102*77\!AIVDM,1,1,,A,13R3=u3P00QfIwhCt;09V?v`0530Li7,0*61 +\s:rMT1200,t:ais-listener,c:1697011340*70\!AIVDM,1,1,,B,13VOUT002W1KM1hFbSbcrITb0<1Muw>,0*03 +\s:rMT6030,t:ais-listener,c:1717190215*76\!AIVDM,1,1,,A,144d5;0uiHQjSVjC6w:iL1Ab26a05d4,0*72 +\s:rMT6030,t:ais-listener,c:1717190199*71\!AIVDM,1,1,,A,13lHkf001C1o?`HC3=Oc@8w408Cos:<,0*3D +\s:rMT6030,t:ais-listener,c:1717190200*72\!AIVDM,1,1,,B,19@9=p0P1<1mUP`C7ttkBOw800RqWe=,0*3C +\s:rMT1200,t:ais-listener,c:1717190111*77\!AIVDM,1,1,,B,34i:=V50001K=nbFbsWeH42D2000j0<,0*5F +\s:rMT1200,t:ais-listener,c:1717189944*77\!AIVDM,1,1,,B,33TVdn1P01QK>R`Fc78v4?vh2DlbI03,0*20 + diff --git a/tests/test_aivdm.py b/tests/test_aivdm.py index 4de0da2..c746988 100644 --- a/tests/test_aivdm.py +++ b/tests/test_aivdm.py @@ -19,7 +19,9 @@ ('\\s:66,c:1663246931*35\\!AIVDM,1,1,,,9001?BP=h:qJ9vb;:f7EN1h240Rb,0*3F', {'alt_sensor': 0, 'assigned_mode': False}), ('\\c:1712130370,s:dynamic,t:spire*55\\!AIVDM,1,1,,A,403wboivQ1WfE`4gnt5MJT?024rp,0*24', - {'year': 2024, 'month': 4, 'day': 3, 'hour': 7, 'minute': 46, 'second': 21}) + {'year': 2024, 'month': 4, 'day': 3, 'hour': 7, 'minute': 46, 'second': 21}), + ('\\s:rMT4097,t:ais-listener,c:1697012399*7E\\!AIVDM,1,1,,A,13UuUj0P00QgDCLEiGAcOOwl2<0Igw;,0*28', + {'id': 1, 'mmsi': 241133000}), ]) def test_decode(nmea, expected): decoder = AIVDM() @@ -31,7 +33,8 @@ def test_decode(nmea, expected): @pytest.mark.parametrize("nmea,error", [ ('!AIVDM,2,1,1,B,@,0*57', 'Expected 2 message parts to decode but found 1'), ('!', 'No valid AIVDM found in'), - ('!AIVDM,1,1,,A,B99999,0*5D', 'AISTOOLS ERR: Not enough bits to decode. Need at least 149 bits, got only 36') + ('!AIVDM,1,1,,A,B99999,0*5D', 'AISTOOLS ERR: Not enough bits to decode. Need at least 149 bits, got only 36'), + ('!AIVDM,1,1,,A,1000,0*28', 'AISTOOLS ERR: Ais1_2_3: AIS_ERR_BAD_BIT_COUNT'), ]) def test_decode_fail(nmea, error): decoder = AIVDM() diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 27fa913..2938c7c 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -229,12 +229,12 @@ def test_nmea_regex(value, expected): @pytest.mark.parametrize("message,expected", [ ({'tagblock_timestamp': 1707443048}, None), - ({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, '745f4bde2318c974'), - ({'nmea': '!BSVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, 'a6926b3f62eeb7d7'), - ({'nmea': '!BSVDM,2,2,2,B,@,0*57', 'tagblock_timestamp': 1707443048}, 'd3972916d1a17048'), + ({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, '784dcbf153c04531'), + ({'nmea': '!BSVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, 'da0adb5959aec091'), + ({'nmea': '!BSVDM,2,2,2,B,@,0*57', 'tagblock_timestamp': 1707443048}, '70bbc54a4bfc5daa'), ({'nmea': 'invalid', 'tagblock_timestamp': 1707443048}, None), ({"nmea": "!AIVDM,1,1,,A,H69@rrS3S?SR3G2D000000000000,0*2e", - "tagblock_timestamp": 1712156268}, '06f1f1b00815aa10'), + "tagblock_timestamp": 1712156268}, 'e804595091021cd8'), ({'nmea': '!AIVDM', 'tagblock_timestamp': 0}, None), ]) def test_normalize_dedup_key(message, expected): @@ -287,9 +287,11 @@ def test_filter_message(message, expected): ({'dim_c': 1, 'dim_d': 1}, {'width': 2}), ({'type_and_cargo': 30}, {'shiptype': 'Fishing'}), ({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, - {'timestamp': '2024-02-09T01:44:08Z', 'dedup_key': '745f4bde2318c974'}), + {'timestamp': '2024-02-09T01:44:08Z', 'dedup_key': '784dcbf153c04531'}), ({'year': 2024, 'month': 4, 'day': 3, 'hour': 2, 'minute': 1, 'second': 0}, - {'tx_timestamp': '2024-04-03T02:01:00Z'}) + {'tx_timestamp': '2024-04-03T02:01:00Z'}), + ({'year': 2024, 'month': 2, 'day': 31, 'hour': 0, 'minute': 0, 'second': 0}, + {}), ]) def test_normalize_message(message, expected): assert normalize_message(message, DEFAULT_FIELD_TRANSFORMS) == expected diff --git a/utils/mem-test.py b/utils/mem-test.py new file mode 100644 index 0000000..743d93a --- /dev/null +++ b/utils/mem-test.py @@ -0,0 +1,21 @@ + +from memory_profiler import profile + +from ais_tools.core import checksum +from ais_tools.core import checksum_str +from ais_tools.core import is_checksum_valid + +def run_checksum(n): + for i in range(n): + str = ''.join(['a','b']) + c = checksum(str) + c = checksum_str(str) + c = is_checksum_valid(str) + + +@profile +def run_test(): + run_checksum(1000000) + + +run_test() \ No newline at end of file diff --git a/utils/perf-test.py b/utils/perf-test.py index d707bcc..c7f7d0b 100644 --- a/utils/perf-test.py +++ b/utils/perf-test.py @@ -3,6 +3,7 @@ import pstats from pstats import SortKey from ais_tools.aivdm import AIVDM, AisToolsDecoder +from ais_tools.normalize import normalize_dedup_key # from ais_tools.aivdm import LibaisDecoder @@ -60,6 +61,14 @@ def full_decode(n): msg.add_parser_version() +def test_normalize_dedup_key(n): + msg = { + 'tagblock_timestamp': 123456789, + 'nmea': "\\!AIVDM,2,1,1,B,5:U7dET2B4iE17KOS:0@Di0PTqE>22222222220l1@F65ut8?=lhCU3l,0*71" + } + for i in range (n): + _ = normalize_dedup_key(msg) + def run_perf_test(func): cProfile.run(func, 'perf-test.stats') @@ -68,7 +77,7 @@ def run_perf_test(func): def main(): - run_perf_test('decode(10000)') + run_perf_test('test_normalize_dedup_key(1000000)') # run_perf_test('full_decode(100000)') # checksum_compare()