Skip to content

Commit

Permalink
Merge pull request #66 from GlobalFishingWatch/dev
Browse files Browse the repository at this point in the history
Merge dev to master for release 0.1.7
  • Loading branch information
pwoods25443 authored Nov 20, 2024
2 parents 5e7064c + a0c46a7 commit d678121
Show file tree
Hide file tree
Showing 14 changed files with 116 additions and 31 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
os:
- macos-11
- macos-latest
- ubuntu-latest
python-version:
- "3.8"
Expand Down
4 changes: 4 additions & 0 deletions ais_tools/ais.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ais_tools.transcode import DecodeError
from ais_tools.transcode import ASCII8toAIS6
from ais_tools import ais_1_2_3
from ais_tools import ais5
from ais_tools import ais8
from ais_tools import ais9
Expand All @@ -19,6 +20,9 @@
}

decode_fn = {
1: ais_1_2_3.ais_1_2_3_decode,
2: ais_1_2_3.ais_1_2_3_decode,
3: ais_1_2_3.ais_1_2_3_decode,
5: ais5.ais5_decode,
8: ais8.ais8_decode,
9: ais9.ais9_decode,
Expand Down
2 changes: 1 addition & 1 deletion ais_tools/ais25.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def ais25_decode(body, pad):
# assume that the pad value was wrong and just ignore the extra bits at the end
new_len = (len(text_bits) // 6) * 6
text_bits = text_bits[:new_len]
message['text'] = ''.join(text_bits.iterdecode(ASCII8toASCII6_decode_tree))
message['text'] = ''.join(text_bits.decode(ASCII8toASCII6_decode_tree))

return message

Expand Down
6 changes: 6 additions & 0 deletions ais_tools/ais_1_2_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import ais as libais
from ais import DecodeError


def ais_1_2_3_decode(body, pad):
return libais.decode(body[:28], 0)
42 changes: 34 additions & 8 deletions ais_tools/core/methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,70 @@
PyObject *
method_compute_checksum(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *py_str;
PyObject *result;
const char *str;

if (nargs != 1)
return PyErr_Format(PyExc_TypeError, "compute_checksum expects 1 argument");

str = PyUnicode_AsUTF8(PyObject_Str(args[0]));
return PyLong_FromLong(checksum(str));
py_str = PyObject_Str(args[0]);

str = PyUnicode_AsUTF8(py_str);

result = PyLong_FromLong(checksum(str));

Py_DECREF(py_str);

return result;
}

PyObject *
method_compute_checksum_str(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *py_str;
PyObject *result;
const char *str;
char c_str[3];

if (nargs != 1)
return PyErr_Format(PyExc_TypeError, "checksum_str expects 1 argument");

str = PyUnicode_AsUTF8(PyObject_Str(args[0]));
py_str = PyObject_Str(args[0]);

str = PyUnicode_AsUTF8(py_str);

checksum_str(c_str, str, ARRAY_LENGTH(c_str));
return PyUnicode_FromString(c_str);

result = PyUnicode_FromString(c_str);

Py_DECREF(py_str);

return result;
}

PyObject *
method_is_checksum_valid(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *py_str;
const char *str;
char buffer[MAX_SENTENCE_LENGTH];
size_t len;

if (nargs != 1)
return PyErr_Format(PyExc_TypeError, "checksum_str expects 1 argument");

str = PyUnicode_AsUTF8(PyObject_Str(args[0]));
py_str = PyObject_Str(args[0]);

if (safe_strcpy(buffer, str, ARRAY_LENGTH(buffer)) >= ARRAY_LENGTH(buffer))
return PyErr_Format(PyExc_ValueError, "String too long");
str = PyUnicode_AsUTF8(py_str);

if (is_checksum_valid(buffer))
len = safe_strcpy(buffer, str, ARRAY_LENGTH(buffer));

Py_DECREF(py_str);

if (len >= ARRAY_LENGTH(buffer))
return PyErr_Format(PyExc_ValueError, "String too long");
else if (is_checksum_valid(buffer))
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
Expand Down
1 change: 0 additions & 1 deletion ais_tools/core/strcpy.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <sys/cdefs.h>
#include <sys/types.h>

/*
Expand Down
15 changes: 8 additions & 7 deletions ais_tools/normalize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional, Any
from datetime import datetime
import hashlib
import xxhash
import re
from enum import Enum

Expand Down Expand Up @@ -36,15 +36,18 @@ def in_range(value, valid_range):
fields = {
'year': (1, 9999),
'month': (1, 12),
'day': (1,31),
'day': (1, 31),
'hour': (0, 23),
'minute': (0, 59),
'second': (0, 59)
}
values = [(f, message.get(f), valid_range) for f, valid_range in fields.items()]
if all(in_range(value, valid_range) for _, value, valid_range in values):
values = {f: value for f, value, _ in values}
return datetime(**values).isoformat(timespec='seconds') + 'Z'
try:
return datetime(**values).isoformat(timespec='seconds') + 'Z'
except ValueError:
return None


def coord_type(val: float, _min: float, _max: float, unavailable: float) -> POSITION_TYPE:
Expand Down Expand Up @@ -168,16 +171,14 @@ def normalize_dedup_key(message: dict) -> Optional[str]:
if 'nmea' not in message or 'tagblock_timestamp' not in message:
return None

nmea = ''.join(re.findall(REGEX_NMEA, message['nmea']))
nmea = ''.join(REGEX_NMEA.findall(message['nmea']))
if not nmea:
return None # no nmea found in message

timestamp = int(message['tagblock_timestamp'] / 60)

key = f'{nmea}_{timestamp}'.encode('utf-8')
h = hashlib.sha1()
h.update(key)
return h.hexdigest()[:16]
return xxhash.xxh3_64_hexdigest(key)


def map_field(message: dict, source_field: str = None) -> Optional[Any]:
Expand Down
6 changes: 3 additions & 3 deletions ais_tools/transcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def bits_to_nmea(bits):
pad = 0
else:
bits = bits + (pad * bitarray('0'))
return ''.join(bits.iterdecode(ASCII8toAIS6_decode_tree)), pad
return ''.join(bits.decode(ASCII8toAIS6_decode_tree)), pad


def nmea_to_bits(body, pad):
Expand Down Expand Up @@ -73,7 +73,7 @@ def to_nmea(self):
bits = self.bits
else:
bits = self.bits + (pad * bitarray('0'))
return ''.join(bits.iterdecode(ASCII8toAIS6_decode_tree)), pad
return ''.join(bits.decode(ASCII8toAIS6_decode_tree)), pad

def pack(self, struct, message):
self.pack_into(struct, self.offset, message)
Expand Down Expand Up @@ -225,4 +225,4 @@ def decode(self, value):
bits = bitarray()
bits.frombytes(value)
bits = bits[:self.nbits]
return ''.join(bits.iterdecode(ASCII8toASCII6_decode_tree))
return ''.join(bits.decode(ASCII8toASCII6_decode_tree))
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ dependencies = [
"Click",
"gpxpy",
"requests",
"bitarray",
"bitarray>=3.0.0",
"cbitstruct",
"udatetime",
"xxhash"
]

[project.urls]
Expand Down
13 changes: 13 additions & 0 deletions sample/type1-bad-bitcount.nmea
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
\s:rMT4097,t:ais-listener,c:1697012399*7E\!AIVDM,1,1,,A,13UuUj0P00QgDCLEiGAcOOwl2<0Igw;,0*28
\s:rMT1200,t:ais-listener,c:1697012048*78\!AIVDM,1,1,,A,33lP4T5P00QJoU6Fd1<v4?v>2D`Jal<,0*7F
\s:rMT1200,t:ais-listener,c:1697011998*7F\!AIVDM,1,1,,A,13R3iq00001LhHDFVNd@RC@R0<0FPF=,0*6C
\s:rMT6030,t:ais-listener,c:1697011876*78\!AIVDM,1,1,,B,15Ca7j002HQer4PClUD<1qbH089PRn4,0*07
\s:rMT6168,t:ais-listener,c:1697014351*7F\!AIVDM,1,1,,B,16eg3D1P00RT5kHCfSje>Ovr:`AhPc=,0*57
\s:rMT6030,t:ais-listener,c:1697014102*77\!AIVDM,1,1,,A,13R3=u3P00QfIwhCt;09V?v`0530Li7,0*61
\s:rMT1200,t:ais-listener,c:1697011340*70\!AIVDM,1,1,,B,13VOUT002W1KM1hFbSbcrITb0<1Muw>,0*03
\s:rMT6030,t:ais-listener,c:1717190215*76\!AIVDM,1,1,,A,144d5;0uiHQjSVjC6w:iL1Ab26a05d4,0*72
\s:rMT6030,t:ais-listener,c:1717190199*71\!AIVDM,1,1,,A,13lHkf001C1o?`HC3=Oc@8w408Cos:<,0*3D
\s:rMT6030,t:ais-listener,c:1717190200*72\!AIVDM,1,1,,B,19@9=p0P1<1mUP`C7ttkBOw800RqWe=,0*3C
\s:rMT1200,t:ais-listener,c:1717190111*77\!AIVDM,1,1,,B,34i:=V50001K=nbFbsWeH42D2000j0<,0*5F
\s:rMT1200,t:ais-listener,c:1717189944*77\!AIVDM,1,1,,B,33TVdn1P01QK>R`Fc78v4?vh2DlbI03,0*20

7 changes: 5 additions & 2 deletions tests/test_aivdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
('\\s:66,c:1663246931*35\\!AIVDM,1,1,,,9001?BP=h:qJ9vb;:f7EN1h240Rb,0*3F',
{'alt_sensor': 0, 'assigned_mode': False}),
('\\c:1712130370,s:dynamic,t:spire*55\\!AIVDM,1,1,,A,403wboivQ1WfE`4gnt5MJT?024rp,0*24',
{'year': 2024, 'month': 4, 'day': 3, 'hour': 7, 'minute': 46, 'second': 21})
{'year': 2024, 'month': 4, 'day': 3, 'hour': 7, 'minute': 46, 'second': 21}),
('\\s:rMT4097,t:ais-listener,c:1697012399*7E\\!AIVDM,1,1,,A,13UuUj0P00QgDCLEiGAcOOwl2<0Igw;,0*28',
{'id': 1, 'mmsi': 241133000}),
])
def test_decode(nmea, expected):
decoder = AIVDM()
Expand All @@ -31,7 +33,8 @@ def test_decode(nmea, expected):
@pytest.mark.parametrize("nmea,error", [
('!AIVDM,2,1,1,B,@,0*57', 'Expected 2 message parts to decode but found 1'),
('!', 'No valid AIVDM found in'),
('!AIVDM,1,1,,A,B99999,0*5D', 'AISTOOLS ERR: Not enough bits to decode. Need at least 149 bits, got only 36')
('!AIVDM,1,1,,A,B99999,0*5D', 'AISTOOLS ERR: Not enough bits to decode. Need at least 149 bits, got only 36'),
('!AIVDM,1,1,,A,1000,0*28', 'AISTOOLS ERR: Ais1_2_3: AIS_ERR_BAD_BIT_COUNT'),
])
def test_decode_fail(nmea, error):
decoder = AIVDM()
Expand Down
14 changes: 8 additions & 6 deletions tests/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,12 @@ def test_nmea_regex(value, expected):

@pytest.mark.parametrize("message,expected", [
({'tagblock_timestamp': 1707443048}, None),
({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, '745f4bde2318c974'),
({'nmea': '!BSVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, 'a6926b3f62eeb7d7'),
({'nmea': '!BSVDM,2,2,2,B,@,0*57', 'tagblock_timestamp': 1707443048}, 'd3972916d1a17048'),
({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, '784dcbf153c04531'),
({'nmea': '!BSVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048}, 'da0adb5959aec091'),
({'nmea': '!BSVDM,2,2,2,B,@,0*57', 'tagblock_timestamp': 1707443048}, '70bbc54a4bfc5daa'),
({'nmea': 'invalid', 'tagblock_timestamp': 1707443048}, None),
({"nmea": "!AIVDM,1,1,,A,H69@rrS3S?SR3G2D000000000000,0*2e",
"tagblock_timestamp": 1712156268}, '06f1f1b00815aa10'),
"tagblock_timestamp": 1712156268}, 'e804595091021cd8'),
({'nmea': '!AIVDM', 'tagblock_timestamp': 0}, None),
])
def test_normalize_dedup_key(message, expected):
Expand Down Expand Up @@ -287,9 +287,11 @@ def test_filter_message(message, expected):
({'dim_c': 1, 'dim_d': 1}, {'width': 2}),
({'type_and_cargo': 30}, {'shiptype': 'Fishing'}),
({'nmea': '!AIVDM,2,2,2,A,@,0*57', 'tagblock_timestamp': 1707443048},
{'timestamp': '2024-02-09T01:44:08Z', 'dedup_key': '745f4bde2318c974'}),
{'timestamp': '2024-02-09T01:44:08Z', 'dedup_key': '784dcbf153c04531'}),
({'year': 2024, 'month': 4, 'day': 3, 'hour': 2, 'minute': 1, 'second': 0},
{'tx_timestamp': '2024-04-03T02:01:00Z'})
{'tx_timestamp': '2024-04-03T02:01:00Z'}),
({'year': 2024, 'month': 2, 'day': 31, 'hour': 0, 'minute': 0, 'second': 0},
{}),
])
def test_normalize_message(message, expected):
assert normalize_message(message, DEFAULT_FIELD_TRANSFORMS) == expected
Expand Down
21 changes: 21 additions & 0 deletions utils/mem-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

from memory_profiler import profile

from ais_tools.core import checksum
from ais_tools.core import checksum_str
from ais_tools.core import is_checksum_valid

def run_checksum(n):
for i in range(n):
str = ''.join(['a','b'])
c = checksum(str)
c = checksum_str(str)
c = is_checksum_valid(str)


@profile
def run_test():
run_checksum(1000000)


run_test()
11 changes: 10 additions & 1 deletion utils/perf-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pstats
from pstats import SortKey
from ais_tools.aivdm import AIVDM, AisToolsDecoder
from ais_tools.normalize import normalize_dedup_key
# from ais_tools.aivdm import LibaisDecoder


Expand Down Expand Up @@ -60,6 +61,14 @@ def full_decode(n):
msg.add_parser_version()


def test_normalize_dedup_key(n):
msg = {
'tagblock_timestamp': 123456789,
'nmea': "\\!AIVDM,2,1,1,B,5:U7dET2B4iE17KOS:0@Di0PTqE>22222222220l1@F65ut8?=lhCU3l,0*71"
}
for i in range (n):
_ = normalize_dedup_key(msg)

def run_perf_test(func):
cProfile.run(func, 'perf-test.stats')

Expand All @@ -68,7 +77,7 @@ def run_perf_test(func):


def main():
run_perf_test('decode(10000)')
run_perf_test('test_normalize_dedup_key(1000000)')
# run_perf_test('full_decode(100000)')
# checksum_compare()

Expand Down

0 comments on commit d678121

Please sign in to comment.