Skip to content

pp_reverse - chunk-at-a-time string reversal (part 2/msvc fix) #23374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: blead
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions perl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1452,18 +1452,26 @@ Use C<L</UINTMAX_C>> to get the largest type available on the platform.
=cut
*/
#ifndef UINT16_C
# if INTSIZE >= 2
# define UINT16_C(x) ((U16_TYPE)x##U)
# ifdef _MSC_VER
# define UINT16_C(x) ((U16TYPE)x##ui16)
# else
# define UINT16_C(x) ((U16_TYPE)x##UL)
# if INTSIZE >= 2
# define UINT16_C(x) ((U16TYPE)x##U)
# else
# define UINT16_C(x) ((U16TYPE)x##UL)
# endif
# endif
#endif

#ifndef UINT32_C
# if INTSIZE >= 4
# define UINT32_C(x) ((U32_TYPE)x##U)
# ifdef _MSC_VER
# define UINT32_C(x) ((U32TYPE)x##ui32)
# else
# define UINT32_C(x) ((U32_TYPE)x##UL)
# if INTSIZE >= 4
# define UINT32_C(x) ((U32TYPE)x##U)
# else
# define UINT32_C(x) ((U32TYPE)x##UL)
# endif
# endif
#endif

Expand Down
190 changes: 168 additions & 22 deletions pp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6529,6 +6529,19 @@ PP(pp_unshift)
return NORMAL;
}

#ifdef _MSC_VER
# pragma intrinsic(_byteswap_ushort, _byteswap_ulong, _byteswap_uint64)
# define S_bswap16(_x) _byteswap_ushort(_x)
# define S_bswap32(_x) _byteswap_ulong(_x)
# define S_bswap64(_x) _byteswap_uint64(_x)
PERL_STATIC_FORCE_INLINE void *
S_memcpy(void *dest, const void *src,size_t count);
#else
# define S_bswap16(_x) _swab_16_(_x)
# define S_bswap32(_x) _swab_32_(_x)
# define S_bswap64(_x) _swab_64_(_x)
# define S_memcpy(_d,_s,_n) memcpy((_d),(_s),(_n))
#endif

PP_wrapped(pp_reverse, 0, 1)
{
Expand All @@ -6555,15 +6568,17 @@ PP_wrapped(pp_reverse, 0, 1)
SV *begin, *end;

if (can_preserve) {
if (!av_exists(av, i)) {
if (av_exists(av, j)) {
bool exi = av_exists(av, i);
bool exj = av_exists(av, j);
if (!exi) {
if (exj) {
SV *sv = av_delete(av, j, 0);
begin = *av_fetch(av, i, TRUE);
sv_setsv_mg(begin, sv);
}
continue;
}
else if (!av_exists(av, j)) {
else if (!exj) {
SV *sv = av_delete(av, i, 0);
end = *av_fetch(av, j, TRUE);
sv_setsv_mg(end, sv);
Expand Down Expand Up @@ -6644,18 +6659,19 @@ PP_wrapped(pp_reverse, 0, 1)
* in a single pass, rather than 2-3 passes. */

const char * src = SvPV_const(src_sv, len);
U8* dd;

/* Prepare the TARG. */
if (SvTYPE(TARG) < SVt_PV) {
if (SvTHINKFIRST(TARG))
SV_CHECK_THINKFIRST_COW_DROP(TARG); /* Drops any buffer or RV */
if (SvTYPE(TARG) < SVt_PV)
SvUPGRADE(TARG, SvTYPE(src_sv)); /* No buffer allocation here */
} else if(SvTHINKFIRST(TARG)) {
SV_CHECK_THINKFIRST_COW_DROP(TARG); /* Drops any buffer */
}
SvSETMAGIC(TARG);
SvGROW(TARG, len + 1);
else /* can't have SMG if < PVMG, SvROK/SvAMAGIC doesn't apply */
SvSETMAGIC(TARG);
dd = (U8*)SvGROW(TARG, len + 1);
SvCUR_set(TARG, len);
SvPOK_only(TARG);
*SvEND(TARG) = '\0';
dd[len] = '\0';
if (SvTAINTED(src_sv))
SvTAINT(TARG);

Expand All @@ -6664,9 +6680,9 @@ PP_wrapped(pp_reverse, 0, 1)
SvUTF8_on(TARG);

const U8* s = (const U8*)src;
U8* dd = (U8*)(SvPVX(TARG) + len);
const U8* send = (const U8*)(s + len);
int bytes = 0;
dd = dd + len;
while (s < send) {
bytes = UTF8SKIP(s);
if (bytes == 1) {
Expand All @@ -6679,10 +6695,69 @@ PP_wrapped(pp_reverse, 0, 1)
}
}
} else {
char * outp= SvPVX(TARG);
const char *p = src + len;
while (p != src)
*outp++ = *--p;
STRLEN i = 0;
STRLEN j = len;
U32 u32_1, u32_2;
U16 u16_1, u16_2;
char * outp = NUM2PTR(char*,dd);
/* Take a chunk of bytes from the front and from the
* back, reverse the bytes in each and and swap the
* chunks over. This should have generally good
* performance but also is likely to be optimised
* into bswap instructions by the compiler.
*/
#ifdef HAS_QUAD
U64 u64_1, u64_2;
while (j - i >= 16) {
S_memcpy(&u64_1, src + j - 8, 8);
S_memcpy(&u64_2, src + i, 8);
u64_1 = S_bswap64(u64_1);
u64_2 = S_bswap64(u64_2);
S_memcpy(outp + j - 8, &u64_2, 8);
S_memcpy(outp + i, &u64_1, 8);
i += 8;
j -= 8;
}

if (j - i >= 8) {
S_memcpy(&u32_1, src + j - 4, 4);
S_memcpy(&u32_2, src + i, 4);
u32_1 = S_bswap32(u32_1);
u32_2 = S_bswap32(u32_2);
S_memcpy(outp + j - 4, &u32_2, 4);
S_memcpy(outp + i, &u32_1, 4);
i += 4;
j -= 4;
}
#else
while (j - i >= 8) {
S_memcpy(&u32_1, src + j - 4, 4);
S_memcpy(&u32_2, src + i, 4);
u32_1 = S_bswap32(u32_1);
u32_2 = S_bswap32(u32_2);
S_memcpy(outp + j - 4, &u32_2, 4);
S_memcpy(outp + i, &u32_1, 4);
i += 4;
j -= 4;
}
#endif
if (j - i >= 4) {
S_memcpy(&u16_1, src + j - 2, 2);
S_memcpy(&u16_2, src + i, 2);
u16_1 = S_bswap16(u16_1);
u16_2 = S_bswap16(u16_2);
S_memcpy(outp + j - 2, &u16_2, 2);
S_memcpy(outp + i, &u16_1, 2);
i += 2;
j -= 2;
}

/* Swap any remaining bytes one by one. */
while (i < j) {
outp[i] = src[j - 1];
outp[j - 1] = src[i];
i++; j--;
}
}
RETURN;
}
Expand All @@ -6695,9 +6770,10 @@ PP_wrapped(pp_reverse, 0, 1)

if (len > 1) {
/* The traditional way, operate on the current byte buffer */
char *down;
if (DO_UTF8(TARG)) { /* first reverse each character */
U8* s = (U8*)SvPVX(TARG);
char *down;
assert(SvPVX(TARG) == up);
U8* s = (U8*)up;
const U8* send = (U8*)(s + len);
while (s < send) {
if (UTF8_IS_INVARIANT(*s)) {
Expand All @@ -6720,18 +6796,76 @@ PP_wrapped(pp_reverse, 0, 1)
}
up = SvPVX(TARG);
}
down = SvPVX(TARG) + len - 1;
while (down > up) {
const char tmp = *up;
*up++ = *down;
*down-- = tmp;
STRLEN i = 0;
STRLEN j = len;
U32 u32_1, u32_2;
U16 u16_1, u16_2;
/* Reverse the buffer in place, in chunks where possible */
#ifdef HAS_QUAD
U64 u64_1, u64_2;
while (j - i >= 16) {
S_memcpy(&u64_1, up + j - 8, 8);
S_memcpy(&u64_2, up + i, 8);
u64_1 = S_bswap64(u64_1);
u64_2 = S_bswap64(u64_2);
S_memcpy(up + j - 8, &u64_2, 8);
S_memcpy(up + i, &u64_1, 8);
i += 8;
j -= 8;
}

if (j - i >= 8) {
S_memcpy(&u32_1, up + j - 4, 4);
S_memcpy(&u32_2, up + i, 4);
u32_1 = S_bswap32(u32_1);
u32_2 = S_bswap32(u32_2);
S_memcpy(up + j - 4, &u32_2, 4);
S_memcpy(up + i, &u32_1, 4);
i += 4;
j -= 4;
}
#else
while (j - i >= 8) {
S_memcpy(&u32_1, up + j - 4, 4);
S_memcpy(&u32_2, up + i, 4);
u32_1 = S_bswap32(u32_1);
u32_2 = S_bswap32(u32_2);
S_memcpy(up + j - 4, &u32_2, 4);
S_memcpy(up + i, &u32_1, 4);
i += 4;
j -= 4;
}
#endif
if (j - i >= 4) {
S_memcpy(&u16_1, up + j - 2, 2);
S_memcpy(&u16_2, up + i, 2);
u16_1 = S_bswap16(u16_1);
u16_2 = S_bswap16(u16_2);
S_memcpy(up + j - 2, &u16_2, 2);
S_memcpy(up + i, &u16_1, 2);
i += 2;
j -= 2;
}

/* Finally, swap any remaining bytes one-by-one. */
while (i < j) {
unsigned char tmp = up[i];
up[i] = up[j - 1];
up[j - 1] = tmp;
i++;
j--;
}
}
(void)SvPOK_only_UTF8(TARG);
}
RETURN;
}

#undef S_memcpy
#undef S_bswap16
#undef S_bswap32
#undef S_bswap64

PP_wrapped(pp_split,
( (PL_op->op_private & OPpSPLIT_ASSIGN)
&& (PL_op->op_flags & OPf_STACKED))
Expand Down Expand Up @@ -8068,6 +8202,18 @@ PP(pp_is_tainted)
return NORMAL;
}

#ifdef _MSC_VER
/* this pragma can't be push/pop-ed vs whatever the cmd line to cl.exe was */
# pragma intrinsic(memcpy)

void *
S_memcpy(void *dest, const void *src, size_t count)
{
return memcpy(dest, src, count);
}

#endif

/*
* ex: set ts=8 sts=4 sw=4 et:
*/
Loading