From 7e1e281af322d7764445aa66970413965d50e533 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 27 Nov 2024 10:40:59 -0500 Subject: [PATCH] fix: ensure that bytes is actually bytes (fixes: #1059) --- pdfminer/pdfdevice.py | 4 ++-- samples/contrib/issue-1059-cmap-decode.pdf | Bin 0 -> 3785 bytes tests/test_tools_pdf2txt.py | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 samples/contrib/issue-1059-cmap-decode.pdf diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py index 2374601c..184922f9 100644 --- a/pdfminer/pdfdevice.py +++ b/pdfminer/pdfdevice.py @@ -167,7 +167,7 @@ def render_string_horizontal( if isinstance(obj, (int, float)): x -= obj * dxscale needcharspace = True - else: + elif isinstance(obj, bytes): for cid in font.decode(obj): if needcharspace: x += charspace @@ -207,7 +207,7 @@ def render_string_vertical( if isinstance(obj, (int, float)): y -= obj * dxscale needcharspace = True - else: + elif isinstance(obj, bytes): for cid in font.decode(obj): if needcharspace: y += charspace diff --git a/samples/contrib/issue-1059-cmap-decode.pdf b/samples/contrib/issue-1059-cmap-decode.pdf new file mode 100644 index 0000000000000000000000000000000000000000..452973df63a8c203031ad9106e24c70172eba900 GIT binary patch literal 3785 zcmb7H*>c-96n*Dc;KdU;9gzTcW;~j)Eh$l3*N!AN4<0|z5+t#eNQH!E;{I3r0r|4t zi;c9L#pc))4|or7a5s`CH^XtCxruY~_1~}mISwO~R1beSXJ_6`zUB?_VPNXbANSn5 z%tgMf)`Nk=%Ox5)cvD+nR)_lhN_>2UC#<&=V#q9IJJa#>gVi|WfA z1(O)YlFYyNuK2RZpN-z+H88chDQ~Jnz2FU~9#^HnA4V8XGabO;O)HTLPH3yBBPi5@ zdXptgS%}9|yT8cXI*0x8c0AVOcm0UQr0>Tm!HX>N?=3gcHgLm^rH02SjX)rrkOvp~ zq)~!HiMf{|QY*kkhA7kbV2_N#N%0tN8crJdhL?{YGnl!YMPOGOUYnBmV5;hUFG{f@*O9hr1edl z`oL)|FhwCBkinFgMYZJ3K40)UFV~!$Q8)(VYz)VMKnfq-&lKxnwb z{0T-Qg?xoz3cMFcG{$)Wg$hXm4I+gkfd;Wcl0bt*AxWS?s*ohmAd~8}4%CV6v`$zY zYD|6UbLkq~a#3;{mVLFy-N}Eihx&ryiLs)E+D z|G0Z@kUd4T&ZF0{R5O;+#r|Nz>ih1o~I8I4{>*>;*GO%|8j2sqSyM z-&O#~oE;v7Y<$W)m|?zZA<}7|2-Y)nn0s$jLNY-f+uig(ST@q2^HjHGff+}z=sGKQ zIuVqv?TzOLv8n3bWOUo``qaCGf)qtn4xx|rhVOjpQ}~N$<|mZ(Y5IoJH>ciJwfwOG z)ba8F1<|`I7IoEBEAfupKmiIeuLwK}PQ7^{w!C*yRe!V7uCz|8*R6u3K5Ki$XXmry zCKzo>`sxDOH(Bf)=ErUnlO%BclmHAu7rL2(TLUxdrYQlaj1Am4pWZs9gOgHHubqpz zBU^ju#+lE^d?g=Pup)Fm#Y|(IXufmqyo>Aix3haP|J51IosZwwmql*S>I2u}3`U=Z zMi5iR2EIn0Vz&BJPx-k%KMq~i(U&k6o;OyXf<}-wHt-etH1nyx@!;qBUJ)EdQ#Vx6 zf!#yb?1wCYbbyB)VumKm|`e8 zc!@|Adn-~Wl@)2Q6{+nFevL>?eJV11fk>SItw@zwtw@cHC}VH%D?}RVQ<2dNM6yn$ zvLL~hVk$wyjJZ?>8Z6EA#_?(B8q*!1xZ8s?G}g-2pp6s5G9u3Ifg)?J zj4j->eRlQs2MyDD0UW7jlpZnH#v{sC8i5xjR$fIS>NGyq$ajO*wV6hii3CeT0Paw; zXy@n^G%`HKd^ZjyQ|(@1Bx@`?hmELeBwNSwL6hAmlL8IX248ONyMFxWeB(9jOU+l{ zZx%TCTB;5bM-e2$ir6ve`|=hk+p!oP#dvha7=yR2qcJ~%7meew^!a=Nz7QVS1>raB z;^$-0bFTaxIhv1!@T@)_i;~WKj;Qlu%WM4ZoE86Y@QWuu8{{hyyiQgXd|bji0K9F% U*GXOr>AxThGw0-FbUk+d2iZM|8UO$Q literal 0 HcmV?d00001 diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py index e80a5e69..e1ae7a01 100644 --- a/tests/test_tools_pdf2txt.py +++ b/tests/test_tools_pdf2txt.py @@ -68,6 +68,12 @@ def test_contrib_issue_350(self): """ run("contrib/issue-00352-asw-oct96-p41.pdf") + def test_contrib_issue_1059_textseq(self): + """Ensure that CMaps are robust to non-strings in text + sequences + (https://github.com/pdfminer/pdfminer.six/issues/1059).""" + run("contrib/issue-1059-cmap-decode.pdf") + def test_scancode_patchelf(self): """Regression test for https://github.com/euske/pdfminer/issues/96""" run("scancode/patchelf.pdf")