Skip to content

Commit f1dd326

Browse files
committed
segfault fix, perf improvement
Fixed segfault in byteswap with incorect usage Improved byteswap performance with string formats Compile with O3 when available Fixed NDEBUG define
1 parent 06d1bb9 commit f1dd326

File tree

4 files changed

+55
-47
lines changed

4 files changed

+55
-47
lines changed

README.md

+19-19
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,25 @@ The script available in `tests/test_perf.py` measures performance comparing to t
4141

4242
Here are the result "on my machine" (Ubuntu in Virtualbox on a laptop):
4343
```
44-
byteswap list of int | x 8.204 ( 9.208us -> 1.122us)
45-
byteswap str | x 6.433 ( 9.689us -> 1.506us)
46-
calcsize | x149.423 ( 61.967us -> 0.415us)
47-
compiled pack | x 43.227 ( 34.758us -> 0.804us)
48-
compiled pack_dict | x 26.490 ( 34.951us -> 1.319us)
49-
compiled pack_into | x 32.017 ( 39.522us -> 1.234us)
50-
compiled pack_into_dict | x 26.817 ( 38.984us -> 1.454us)
51-
compiled unpack | x 34.454 ( 31.814us -> 0.923us)
52-
compiled unpack_dict | x 23.534 ( 34.071us -> 1.448us)
53-
compiled unpack_from | x 27.170 ( 31.884us -> 1.174us)
54-
compiled unpack_from_dict | x 22.600 ( 33.927us -> 1.501us)
55-
pack | x 78.314 ( 105.593us -> 1.348us)
56-
pack_dict | x 52.916 ( 106.748us -> 2.017us)
57-
pack_into | x 82.233 ( 119.950us -> 1.459us)
58-
pack_into_dict | x 45.214 ( 111.338us -> 2.462us)
59-
unpack | x 82.712 ( 93.686us -> 1.133us)
60-
unpack_dict | x 41.064 ( 91.473us -> 2.228us)
61-
unpack_from | x 81.678 ( 95.729us -> 1.172us)
62-
unpack_from_dict | x 40.379 ( 90.430us -> 2.240us)
44+
byteswap list of int | x 8.779 ( 8.638us -> 0.984us)
45+
byteswap str | x 17.466 ( 9.158us -> 0.524us)
46+
calcsize | x139.330 ( 61.060us -> 0.438us)
47+
compiled pack | x 47.389 ( 35.968us -> 0.759us)
48+
compiled pack_dict | x 27.184 ( 34.588us -> 1.272us)
49+
compiled pack_into | x 32.037 ( 38.650us -> 1.206us)
50+
compiled pack_into_dict | x 27.343 ( 37.718us -> 1.379us)
51+
compiled unpack | x 33.928 ( 31.278us -> 0.922us)
52+
compiled unpack_dict | x 21.627 ( 31.597us -> 1.461us)
53+
compiled unpack_from | x 30.622 ( 29.977us -> 0.979us)
54+
compiled unpack_from_dict | x 20.479 ( 30.936us -> 1.511us)
55+
pack | x 77.003 ( 103.030us -> 1.338us)
56+
pack_dict | x 53.254 ( 103.255us -> 1.939us)
57+
pack_into | x 82.829 ( 119.373us -> 1.441us)
58+
pack_into_dict | x 52.173 ( 108.135us -> 2.073us)
59+
unpack | x 78.459 ( 91.896us -> 1.171us)
60+
unpack_dict | x 40.287 ( 89.300us -> 2.217us)
61+
unpack_from | x 77.027 ( 91.202us -> 1.184us)
62+
unpack_from_dict | x 39.467 ( 88.043us -> 2.231us)
6363
```
6464

6565
*Disclaimer:* these results may and will vary largely depending on the number of elements and types you pack/unpack. This script is provided as-is, and I will gladly accept an improved script providing more reliable results.

cbitstruct/_cbitstruct.c

+30-26
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ static void c_pack(
321321
int nbytes = (desc->bits + 7) / 8;
322322
int padding = nbytes * 8 - desc->bits;
323323
#if PY_LITTLE_ENDIAN
324-
assert(nbytes <= sizeof(data));
324+
assert(nbytes <= (int)sizeof(data));
325325
c_byteswitch((uint8_t*)&data, nbytes);
326326
#endif
327327
data >>= padding;
@@ -410,7 +410,7 @@ static void c_unpack(
410410
int padding = nbytes * 8 - desc->bits;
411411
data <<= padding;
412412
#if PY_LITTLE_ENDIAN
413-
assert(nbytes <= sizeof(data));
413+
assert(nbytes <= (int)sizeof(data));
414414
c_byteswitch((uint8_t*)&data, nbytes);
415415
#endif
416416
}
@@ -456,7 +456,7 @@ static bool python_to_parsed_elements(
456456
Py_ssize_t data_size,
457457
CompiledFormat fmt)
458458
{
459-
assert(data_size >= fmt.ndescs);
459+
assert(data_size >= fmt.ndescs - fmt.npadding);
460460

461461
int n = 0;
462462
for (int i = 0; i < fmt.ndescs; ++i) {
@@ -676,8 +676,6 @@ static PyObject* CompiledFormat_pack_raw(
676676
PyObject** data,
677677
Py_ssize_t n_data)
678678
{
679-
assert(PyTuple_Check(args));
680-
681679
ParsedElement elements_stack[SMALL_FORMAT_OPTIMIZATION];
682680
ParsedElement* elements = elements_stack;
683681
bool use_stack = compiled_fmt.ndescs <= SMALL_FORMAT_OPTIMIZATION;
@@ -1170,8 +1168,6 @@ CompiledFormatDict_pack_into_impl(PyCompiledFormatDictObject *self,
11701168
/*[clinic end generated code: output=ee246de261e9c699 input=290a9a4a3e3ed942]*/
11711169
// clang-format on
11721170
{
1173-
assert(PyTuple_Check(args));
1174-
11751171
PyObject* return_value = NULL;
11761172

11771173
Py_ssize_t nnames = PySequence_Fast_GET_SIZE(self->names);
@@ -1461,8 +1457,6 @@ pack_into_dict_impl(PyObject *module, const char *fmt, PyObject *names,
14611457
/*[clinic end generated code: output=619b415fc187011b input=e72dec46484ec66f]*/
14621458
// clang-format on
14631459
{
1464-
assert(PyTuple_Check(args));
1465-
14661460
PyObject* return_value = NULL;
14671461
PyCompiledFormatDictObject self;
14681462
memset(&self, 0, sizeof(self));
@@ -1692,28 +1686,38 @@ byteswap_impl(PyObject *module, PyObject *fmt, Py_buffer *data,
16921686
goto exit;
16931687
}
16941688

1695-
int sum = 0;
1696-
for (int i = 0; i < length; ++i) {
1697-
PyObject* item = PySequence_GetItem(fmt, i);
1698-
if (!item) {
1689+
long sum = 0;
1690+
if (PyUnicode_Check(fmt)) {
1691+
const char* cfmt = PyUnicode_AsUTF8(fmt);
1692+
if (!cfmt) {
16991693
goto exit;
17001694
}
17011695

1702-
long len = -1;
1703-
if (PyUnicode_Check(item)) {
1704-
PyObject* pylong = PyLong_FromUnicodeObject(item, 10);
1705-
len = PyLong_AsLong(pylong);
1706-
Py_DECREF(pylong);
1707-
}
1708-
else {
1709-
len = PyLong_AsLong(item);
1696+
for (int i = 0; i < length; ++i) {
1697+
int len = cfmt[i] - '0';
1698+
if (len < 0 || len > 9) {
1699+
PyErr_SetString(
1700+
PyExc_ValueError, "bad value in byteswap format");
1701+
goto exit;
1702+
}
1703+
sum += len;
1704+
count_iter[i] = len;
17101705
}
1706+
}
1707+
else {
1708+
for (int i = 0; i < length; ++i) {
1709+
PyObject* item = PySequence_GetItem(fmt, i);
1710+
if (!item) {
1711+
goto exit;
1712+
}
17111713

1712-
sum += len;
1713-
count_iter[i] = len;
1714-
Py_DECREF(item);
1715-
if (len < 0 || PyErr_Occurred()) {
1716-
goto exit;
1714+
long len = PyLong_AsLong(item);
1715+
sum += len;
1716+
count_iter[i] = len;
1717+
Py_DECREF(item);
1718+
if (len == -1 && PyErr_Occurred()) {
1719+
goto exit;
1720+
}
17171721
}
17181722
}
17191723

cbitstruct/tests/test_cornercase.py

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def test_byteswap_bad_args(self):
117117
self.assertRaises(TypeError, cbitstruct.byteswap, None, b"\xff")
118118
self.assertRaises(TypeError, cbitstruct.byteswap, "23")
119119
self.assertRaises(TypeError, cbitstruct.byteswap)
120+
self.assertRaises(ValueError, cbitstruct.byteswap, "\x02\x02", b"z")
120121

121122
def test_calcsize_bad_args(self):
122123
self.assertRaises(TypeError, cbitstruct.calcsize, "g32")

setup.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,19 @@
66

77
extra_compile_args = []
88
extra_link_args = []
9+
undef_macros = []
910

1011

1112
if sys.platform == "win32":
1213
extra_compile_args += []
1314
else:
14-
extra_compile_args += ["-std=c11", "-Wall", "-Werror"]
15+
extra_compile_args += ["-std=c11", "-Wall", "-Werror", "-O3"]
1516

1617

1718
if os.environ.get("COVERAGE"):
1819
extra_compile_args += ["-g", "-O0", "-fprofile-arcs", "-ftest-coverage"]
1920
extra_link_args += ["-fprofile-arcs"]
21+
undef_macros += ["NDEBUG"]
2022

2123

2224
with open("README.md", "r") as fh:
@@ -25,7 +27,7 @@
2527

2628
setup(
2729
name="cbitstruct",
28-
version="1.0.2",
30+
version="1.0.3",
2931
author="Quentin CHATEAU",
3032
author_email="[email protected]",
3133
license="GPLv3",
@@ -57,6 +59,7 @@
5759
extra_link_args=extra_link_args,
5860
sources=["cbitstruct/_cbitstruct.c"],
5961
include_dirs=["cbitstruct/"],
62+
undef_macros=undef_macros,
6063
)
6164
],
6265
packages=["cbitstruct", "cbitstruct.tests"],

0 commit comments

Comments
 (0)