Browse Source

Prune and clean up string charlen algorithms

pull/422/head
Sami Vaarala 9 years ago
parent
commit
bc7a732002
  1. 483
      src/duk_unicode_support.c

483
src/duk_unicode_support.c

@ -257,101 +257,21 @@ DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked(duk_hthread *thr,
/* Compute (extended) utf-8 length without codepoint encoding validation, /* Compute (extended) utf-8 length without codepoint encoding validation,
* used for string interning. * used for string interning.
* *
* NOTE: This algorithm is performance critical (more so than string hashing * NOTE: This algorithm is performance critical, more so than string hashing
* in some cases): it is needed when interning a string and it needs to scan * in some cases. It is needed when interning a string and needs to scan
* every byte of the string with no skipping. Having an ASCII fast path * every byte of the string with no skipping. Having an ASCII fast path
* would be useful (if possible in the algorithm). Several variants are * is useful if possible in the algorithm. The current algorithms were
* left below, commented out; the active algorithm was chosen on x64 based * chosen from several variants, based on x64 gcc -O2 testing. See:
* on gcc -O2 testing. * https://github.com/svaarala/duktape/pull/422
*/ */
const duk_uint8_t duk__ncont_incr[256] = { #if defined(DUK_USE_PREFER_SIZE)
/* 10xxxxxx = continuation chars (0x80...0xbf), above /* Small variant; roughly 150 bytes smaller than the fast variant. */
* and below that initial bytes. DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) {
*/
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
const duk_uint8_t duk__nchar_incr[256] = {
/* 10xxxxxx = continuation chars (0x80...0xbf), above
* and below that initial bytes.
*/
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple1(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p;
const duk_uint8_t *p_end;
duk_size_t clen;
p = data;
p_end = data + blen;
clen = 0;
while (p != p_end) {
duk_uint8_t x = *p++;
if (DUK_LIKELY(x < 0x80 || x >= 0xc0)) {
/* 10xxxxxx = continuation chars (0x80...0xbf), above
* and below that initial bytes.
*/
clen++;
}
}
return clen;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple2(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p;
const duk_uint8_t *p_end;
duk_size_t ncont;
p = data;
p_end = data + blen;
ncont = 0;
while (p != p_end) {
duk_uint8_t x = *p++;
if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
ncont++;
}
}
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple3(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p; const duk_uint8_t *p;
const duk_uint8_t *p_end; const duk_uint8_t *p_end;
duk_size_t ncont; duk_size_t ncont;
duk_size_t clen;
p = data; p = data;
p_end = data + blen; p_end = data + blen;
@ -359,156 +279,53 @@ DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple3(const duk_uint8_t *data, duk_
while (p != p_end) { while (p != p_end) {
duk_uint8_t x; duk_uint8_t x;
x = *p++; x = *p++;
ncont += ((x & 0xc0) == 0x80) ? 1 : 0; if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
} ncont++;
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple4(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p;
const duk_uint8_t *p_end;
duk_size_t ncont;
p = data;
p_end = data + blen;
ncont = 0;
while (p != p_end) {
/* Bit trick:
* 10xxxxxx ^ 01000000 = 11xxxxxx (and other bit patterns are 10xxxxxx or less)
* + 01000000 = 1 00xxxxxx (and other bit patterns won't overflow to 9 bits)
* >>> 8 = 1
*/
duk_small_uint_t x;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
} }
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_lookup1(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p;
const duk_uint8_t *p_end;
duk_size_t clen;
p = data;
p_end = data + blen;
clen = 0;
while (p != p_end) {
duk_uint8_t x;
x = *p++;
clen += duk__nchar_incr[x];
} }
DUK_ASSERT(ncont <= blen);
clen = blen - ncont;
DUK_ASSERT(clen <= blen);
return clen; return clen;
} }
#else /* DUK_USE_PREFER_SIZE */
DUK_LOCAL duk_size_t duk__unicode_utf8clen_lookup2(const duk_uint8_t *data, duk_size_t blen) { /* This seems like a good overall approach. Fast path for ASCII in 4 byte
* blocks.
*/
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p; const duk_uint8_t *p;
const duk_uint8_t *p_end; const duk_uint8_t *p_end;
const duk_uint32_t *p32_end;
const duk_uint32_t *p32;
duk_size_t ncont; duk_size_t ncont;
duk_size_t clen;
ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
p = data; p = data;
p_end = data + blen; p_end = data + blen;
ncont = 0;
while (p != p_end) {
duk_uint8_t x;
x = *p++;
ncont += duk__ncont_incr[x];
}
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll1(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
const duk_uint32_t *p32;
if (blen < 16) { if (blen < 16) {
goto skip_fastpath; goto skip_fastpath;
} }
/* Align 'p' to 4. */
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
}
/* Full, aligned 4-byte reads. */
p_end = data + blen;
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03));
p32 = (const duk_uint32_t *) p;
while (p32 != (const duk_uint32_t *) p_end) {
duk_uint32_t x;
x = *p32++;
if ((x & 0x80808080UL) == 0) {
; /* ASCII fast path */
} else {
if ((x & 0xc0000000UL) == 0x80000000UL) {
ncont++;
}
if ((x & 0x00c00000UL) == 0x00800000UL) {
ncont++;
}
if ((x & 0x0000c000UL) == 0x00008000UL) {
ncont++;
}
if ((x & 0x000000c0UL) == 0x00000080UL) {
ncont++;
}
}
}
p = (const duk_uint8_t *) p32;
/* Fall through to handle the rest. */
skip_fastpath:
p_end = data + blen;
while (p != p_end) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll2(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
const duk_uint32_t *p32;
if (blen < 16) { /* Align 'p' to 4; the input data may have arbitrary alignment.
goto skip_fastpath; * End of string check not needed because blen >= 16.
} */
/* Align 'p' to 4. */ while (((duk_small_uint_t) (duk_uintptr_t) (const void *) p) & 0x03) {
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) {
duk_uint8_t x; duk_uint8_t x;
x = *p++; x = *p++;
if (x < 0x80 || x >= 0xc0) { if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
;
} else {
ncont++; ncont++;
} }
} }
/* Full, aligned 4-byte reads. */ /* Full, aligned 4-byte reads. */
p_end = data + blen; p32_end = (const duk_uint32_t *) (p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)));
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03));
p32 = (const duk_uint32_t *) p; p32 = (const duk_uint32_t *) p;
while (p32 != (const duk_uint32_t *) p_end) { while (p32 != (const duk_uint32_t *) p32_end) {
duk_uint32_t x; duk_uint32_t x;
x = *p32++; x = *p32++;
if ((x & 0x80808080UL) == 0) { if (DUK_LIKELY((x & 0x80808080UL) == 0)) {
; /* ASCII fast path */ ; /* ASCII fast path */
} else { } else {
/* Flip highest bit of each byte which changes /* Flip highest bit of each byte which changes
@ -516,260 +333,38 @@ DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll2(const duk_uint8_t *data, duk_
* allows an easy bit mask test. * allows an easy bit mask test.
*/ */
x ^= 0x80808080UL; x ^= 0x80808080UL;
if (!(x & 0xc0000000UL)) { if (DUK_UNLIKELY(!(x & 0xc0000000UL))) {
ncont++; ncont++;
} }
if (!(x & 0x00c00000UL)) { if (DUK_UNLIKELY(!(x & 0x00c00000UL))) {
ncont++; ncont++;
} }
if (!(x & 0x0000c000UL)) { if (DUK_UNLIKELY(!(x & 0x0000c000UL))) {
ncont++; ncont++;
} }
if (!(x & 0x000000c0UL)) { if (DUK_UNLIKELY(!(x & 0x000000c0UL))) {
ncont++; ncont++;
} }
} }
} }
p = (const duk_uint8_t *) p32; p = (const duk_uint8_t *) p32;
/* Fall through to handle the rest. */ /* Fall through to handle the rest. */
skip_fastpath:
p_end = data + blen;
while (p != p_end) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll3(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
const duk_uint32_t *p32;
if (blen < 16) {
goto skip_fastpath;
}
/* Align 'p' to 4. */
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
}
/* Full, aligned 4-byte reads. */
p_end = data + blen;
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03));
p32 = (const duk_uint32_t *) p;
while (p32 != (const duk_uint32_t *) p_end) {
/* Bit tricks to work 4 bytes at a time, similar to the bit trick below.
*
* 10xxxxxx 10xxxxxx 10xxxxxxx 10xxxxxx
* ^ 0x40404040 11xxxxxx 11xxxxxx 11xxxxxxx 11xxxxxx
* >> 6 00000011 00000011 000000011 00000011
* + 0x01010101 00000100 00000100 000000100 00000100
* ^ ^ ^ ^
* `--------+---------+--------+---- carry if cont byte [+]
*/
duk_uint32_t x;
x = *p32++;
x = ((x ^ 0x40404040UL) >> 6) + 0x01010101UL;
x &= 0x04040404UL;
x = (x & 0xffffUL) + (x >> 16); /* two step sum of carries */
x = (x & 0xffUL) + (x >> 8);
ncont += x >> 2;
}
/* Fall through to handle the rest. */
skip_fastpath: skip_fastpath:
p_end = data + blen;
while (p != p_end) { while (p != p_end) {
duk_uint8_t x; duk_uint8_t x;
x = *p++; x = *p++;
if (x < 0x80 || x >= 0xc0) { if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
;
} else {
ncont++;
}
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll4(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
p_end = data + (blen & ((duk_size_t) (~0x03)));
while (p < p_end) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++;
}
}
p_end = data + blen;
while (p < p_end) {
duk_uint8_t x;
x = *p++;
if (x < 0x80 || x >= 0xc0) {
;
} else {
ncont++; ncont++;
} }
} }
DUK_ASSERT(ncont <= blen); DUK_ASSERT(ncont <= blen);
return blen - ncont; clen = blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll5(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
p_end = data + (blen & ((duk_size_t) (~0x03)));
while (p != p_end) {
duk_uint8_t x;
x = *p++;
ncont += duk__ncont_incr[x];
x = *p++;
ncont += duk__ncont_incr[x];
x = *p++;
ncont += duk__ncont_incr[x];
x = *p++;
ncont += duk__ncont_incr[x];
}
p_end = data + blen;
while (p != p_end) {
duk_uint8_t x;
x = *p++;
ncont += duk__ncont_incr[x];
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll6(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
p_end = data + (blen & ((duk_size_t) (~0x03)));
while (p != p_end) {
duk_small_uint_t x;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
}
p_end = data + blen;
while (p != p_end) {
duk_small_uint_t x;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll7(const duk_uint8_t *data, duk_size_t blen) {
const duk_uint8_t *p = data;
const duk_uint8_t *p_end;
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
p_end = data + (blen & ((duk_size_t) (~0x03)));
while (p != p_end) {
/* Similar bit trick as above, but postpone the shift.
* This means we need to avoid overflows from the lower
* bits and need the "x & 0xc0".
*/
duk_small_uint_t x;
duk_small_uint_t tmp = 0;
x = *p++;
tmp += ((x & 0xc0) ^ 0x40) + 0x40;
x = *p++;
tmp += ((x & 0xc0) ^ 0x40) + 0x40;
x = *p++;
tmp += ((x & 0xc0) ^ 0x40) + 0x40;
x = *p++;
tmp += ((x & 0xc0) ^ 0x40) + 0x40;
ncont += tmp;
}
p_end = data + blen;
while (p != p_end) {
duk_small_uint_t x;
x = *p++;
ncont += ((x ^ 0x40) + 0x40) >> 8;
}
DUK_ASSERT(ncont <= blen);
return blen - ncont;
}
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) {
duk_size_t clen;
#if 0
clen = duk__unicode_utf8clen_simple1(data, blen);
clen = duk__unicode_utf8clen_simple2(data, blen);
clen = duk__unicode_utf8clen_simple3(data, blen);
clen = duk__unicode_utf8clen_simple4(data, blen);
clen = duk__unicode_utf8clen_lookup1(data, blen);
clen = duk__unicode_utf8clen_lookup2(data, blen);
clen = duk__unicode_utf8clen_unroll1(data, blen);
clen = duk__unicode_utf8clen_unroll2(data, blen);
clen = duk__unicode_utf8clen_unroll3(data, blen);
clen = duk__unicode_utf8clen_unroll4(data, blen);
clen = duk__unicode_utf8clen_unroll5(data, blen);
clen = duk__unicode_utf8clen_unroll6(data, blen);
clen = duk__unicode_utf8clen_unroll7(data, blen);
#endif
clen = duk__unicode_utf8clen_unroll1(data, blen);
DUK_ASSERT(clen <= blen); DUK_ASSERT(clen <= blen);
return clen; return clen;
} }
#endif /* DUK_USE_PREFER_SIZE */
/* /*
* Unicode range matcher * Unicode range matcher

Loading…
Cancel
Save