Skip to content

Commit dd81035

Browse files
committed
Marshalling: Use orjson to improve JSON serialization performance
https://github.com/ijl/orjson
1 parent a2aae9b commit dd81035

File tree

4 files changed

+52
-34
lines changed

4 files changed

+52
-34
lines changed

CHANGES.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,17 @@ Changes for crate
55
Unreleased
66
==========
77

8+
- Switched JSON encoder to use the `orjson`_ library, to improve JSON
9+
marshalling performance. Thanks, @widmogrod.
10+
orjson is fast and in some spots even more correct when compared against
11+
Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson
12+
will serialize to ``bytes`` instead of ``str``. Please also note it
13+
will not deserialize to dataclasses, UUIDs, decimals, etc., or support
14+
``object_hook``. Within ``crate-python``, it is applied with an encoder
15+
function for additional type support about Python's ``Decimal`` type and
16+
freezegun's ``FakeDatetime`` type.
17+
18+
.. _orjson: https://github.com/ijl/orjson
819

920
2024/11/23 1.0.1
1021
================

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def read(path):
5454
packages=find_namespace_packages("src"),
5555
package_dir={"": "src"},
5656
install_requires=[
57+
"orjson<4",
5758
"urllib3",
5859
"verlib2",
5960
],

src/crate/client/http.py

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,21 @@
2020
# software solely pursuant to the terms of the relevant commercial agreement.
2121

2222

23-
import calendar
2423
import heapq
2524
import io
26-
import json
2725
import logging
2826
import os
2927
import re
3028
import socket
3129
import ssl
3230
import threading
31+
import typing as t
3332
from base64 import b64encode
34-
from datetime import date, datetime, timezone
3533
from decimal import Decimal
3634
from time import time
3735
from urllib.parse import urlparse
38-
from uuid import UUID
3936

37+
import orjson
4038
import urllib3
4139
from urllib3 import connection_from_url
4240
from urllib3.connection import HTTPConnection
@@ -86,25 +84,33 @@ def super_len(o):
8684
return None
8785

8886

89-
class CrateJsonEncoder(json.JSONEncoder):
90-
epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
91-
epoch_naive = datetime(1970, 1, 1)
92-
93-
def default(self, o):
94-
if isinstance(o, (Decimal, UUID)):
95-
return str(o)
96-
if isinstance(o, datetime):
97-
if o.tzinfo is not None:
98-
delta = o - self.epoch_aware
99-
else:
100-
delta = o - self.epoch_naive
101-
return int(
102-
delta.microseconds / 1000.0
103-
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
104-
)
105-
if isinstance(o, date):
106-
return calendar.timegm(o.timetuple()) * 1000
107-
return json.JSONEncoder.default(self, o)
87+
def cratedb_json_encoder(obj: t.Any) -> str:
88+
"""
89+
Encoder function for orjson, with additional type support.
90+
91+
- Python's `Decimal` type.
92+
- freezegun's `FakeDatetime` type.
93+
94+
https://github.com/ijl/orjson#default
95+
"""
96+
if isinstance(obj, Decimal):
97+
return str(obj)
98+
elif hasattr(obj, "isoformat"):
99+
return obj.isoformat()
100+
raise TypeError
101+
102+
103+
def json_dumps(obj: t.Any) -> bytes:
104+
"""
105+
Serialize to JSON format, using `orjson`, with additional type support.
106+
107+
https://github.com/ijl/orjson
108+
"""
109+
return orjson.dumps(
110+
obj,
111+
default=cratedb_json_encoder,
112+
option=orjson.OPT_SERIALIZE_NUMPY,
113+
)
108114

109115

110116
class Server:
@@ -180,7 +186,7 @@ def close(self):
180186

181187
def _json_from_response(response):
182188
try:
183-
return json.loads(response.data.decode("utf-8"))
189+
return orjson.loads(response.data)
184190
except ValueError as ex:
185191
raise ProgrammingError(
186192
"Invalid server response of content-type '{}':\n{}".format(
@@ -223,7 +229,7 @@ def _raise_for_status_real(response):
223229
if response.status == 503:
224230
raise ConnectionError(message)
225231
if response.headers.get("content-type", "").startswith("application/json"):
226-
data = json.loads(response.data.decode("utf-8"))
232+
data = orjson.loads(response.data)
227233
error = data.get("error", {})
228234
error_trace = data.get("error_trace", None)
229235
if "results" in data:
@@ -323,7 +329,7 @@ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs):
323329
kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED
324330

325331

326-
def _create_sql_payload(stmt, args, bulk_args):
332+
def _create_sql_payload(stmt, args, bulk_args) -> bytes:
327333
if not isinstance(stmt, str):
328334
raise ValueError("stmt is not a string")
329335
if args and bulk_args:
@@ -334,7 +340,7 @@ def _create_sql_payload(stmt, args, bulk_args):
334340
data["args"] = args
335341
if bulk_args:
336342
data["bulk_args"] = bulk_args
337-
return json.dumps(data, cls=CrateJsonEncoder)
343+
return json_dumps(data)
338344

339345

340346
def _get_socket_opts(

tests/client/test_http.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@
4949
)
5050
from crate.client.http import (
5151
Client,
52-
CrateJsonEncoder,
5352
_get_socket_opts,
5453
_remove_certs_for_non_https,
54+
json_dumps,
5555
)
5656

5757
REQUEST = "crate.client.http.Server.request"
@@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
318318
# convert string to dict
319319
# because the order of the keys isn't deterministic
320320
data = json.loads(request.call_args[1]["data"])
321-
self.assertEqual(data["args"], [1425108700000])
321+
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
322322
client.close()
323323

324324
@patch(REQUEST, autospec=True)
@@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
329329
day = dt.date(2016, 4, 21)
330330
client.sql("insert into users (dt) values (?)", (day,))
331331
data = json.loads(request.call_args[1]["data"])
332-
self.assertEqual(data["args"], [1461196800000])
332+
self.assertEqual(data["args"], ["2016-04-21"])
333333
client.close()
334334

335335
def test_socket_options_contain_keepalive(self):
@@ -724,10 +724,10 @@ def test_username(self):
724724
class TestCrateJsonEncoder(TestCase):
725725
def test_naive_datetime(self):
726726
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
727-
result = json.dumps(data, cls=CrateJsonEncoder)
728-
self.assertEqual(result, "1687771440123")
727+
result = json_dumps(data)
728+
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')
729729

730730
def test_aware_datetime(self):
731731
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
732-
result = json.dumps(data, cls=CrateJsonEncoder)
733-
self.assertEqual(result, "1687764240123")
732+
result = json_dumps(data)
733+
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')

0 commit comments

Comments
 (0)