|
@ -257,101 +257,21 @@ DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked(duk_hthread *thr, |
|
|
/* Compute (extended) utf-8 length without codepoint encoding validation,
|
|
|
/* Compute (extended) utf-8 length without codepoint encoding validation,
|
|
|
* used for string interning. |
|
|
* used for string interning. |
|
|
* |
|
|
* |
|
|
* NOTE: This algorithm is performance critical (more so than string hashing |
|
|
* NOTE: This algorithm is performance critical, more so than string hashing |
|
|
* in some cases): it is needed when interning a string and it needs to scan |
|
|
* in some cases. It is needed when interning a string and needs to scan |
|
|
* every byte of the string with no skipping. Having an ASCII fast path |
|
|
* every byte of the string with no skipping. Having an ASCII fast path |
|
|
* would be useful (if possible in the algorithm). Several variants are |
|
|
* is useful if possible in the algorithm. The current algorithms were |
|
|
* left below, commented out; the active algorithm was chosen on x64 based |
|
|
* chosen from several variants, based on x64 gcc -O2 testing. See: |
|
|
* on gcc -O2 testing. |
|
|
* https://github.com/svaarala/duktape/pull/422
|
|
|
*/ |
|
|
*/ |
|
|
|
|
|
|
|
|
const duk_uint8_t duk__ncont_incr[256] = { |
|
|
#if defined(DUK_USE_PREFER_SIZE) |
|
|
/* 10xxxxxx = continuation chars (0x80...0xbf), above
|
|
|
/* Small variant; roughly 150 bytes smaller than the fast variant. */ |
|
|
* and below that initial bytes. |
|
|
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) { |
|
|
*/ |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
const duk_uint8_t duk__nchar_incr[256] = { |
|
|
|
|
|
/* 10xxxxxx = continuation chars (0x80...0xbf), above
|
|
|
|
|
|
* and below that initial bytes. |
|
|
|
|
|
*/ |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple1(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t clen; |
|
|
|
|
|
|
|
|
|
|
|
p = data; |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
clen = 0; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x = *p++; |
|
|
|
|
|
if (DUK_LIKELY(x < 0x80 || x >= 0xc0)) { |
|
|
|
|
|
/* 10xxxxxx = continuation chars (0x80...0xbf), above
|
|
|
|
|
|
* and below that initial bytes. |
|
|
|
|
|
*/ |
|
|
|
|
|
clen++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return clen; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple2(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont; |
|
|
|
|
|
|
|
|
|
|
|
p = data; |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
ncont = 0; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x = *p++; |
|
|
|
|
|
if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple3(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p; |
|
|
const duk_uint8_t *p; |
|
|
const duk_uint8_t *p_end; |
|
|
const duk_uint8_t *p_end; |
|
|
duk_size_t ncont; |
|
|
duk_size_t ncont; |
|
|
|
|
|
duk_size_t clen; |
|
|
|
|
|
|
|
|
p = data; |
|
|
p = data; |
|
|
p_end = data + blen; |
|
|
p_end = data + blen; |
|
@ -359,156 +279,53 @@ DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple3(const duk_uint8_t *data, duk_ |
|
|
while (p != p_end) { |
|
|
while (p != p_end) { |
|
|
duk_uint8_t x; |
|
|
duk_uint8_t x; |
|
|
x = *p++; |
|
|
x = *p++; |
|
|
ncont += ((x & 0xc0) == 0x80) ? 1 : 0; |
|
|
if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) { |
|
|
} |
|
|
ncont++; |
|
|
|
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_simple4(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont; |
|
|
|
|
|
|
|
|
|
|
|
p = data; |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
ncont = 0; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
/* Bit trick:
|
|
|
|
|
|
* 10xxxxxx ^ 01000000 = 11xxxxxx (and other bit patterns are 10xxxxxx or less) |
|
|
|
|
|
* + 01000000 = 1 00xxxxxx (and other bit patterns won't overflow to 9 bits) |
|
|
|
|
|
* >>> 8 = 1 |
|
|
|
|
|
*/ |
|
|
|
|
|
duk_small_uint_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_lookup1(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t clen; |
|
|
|
|
|
|
|
|
|
|
|
p = data; |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
clen = 0; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
clen += duk__nchar_incr[x]; |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
clen = blen - ncont; |
|
|
|
|
|
DUK_ASSERT(clen <= blen); |
|
|
return clen; |
|
|
return clen; |
|
|
} |
|
|
} |
|
|
|
|
|
#else /* DUK_USE_PREFER_SIZE */ |
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_lookup2(const duk_uint8_t *data, duk_size_t blen) { |
|
|
/* This seems like a good overall approach. Fast path for ASCII in 4 byte
|
|
|
|
|
|
* blocks. |
|
|
|
|
|
*/ |
|
|
|
|
|
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) { |
|
|
const duk_uint8_t *p; |
|
|
const duk_uint8_t *p; |
|
|
const duk_uint8_t *p_end; |
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
const duk_uint32_t *p32_end; |
|
|
|
|
|
const duk_uint32_t *p32; |
|
|
duk_size_t ncont; |
|
|
duk_size_t ncont; |
|
|
|
|
|
duk_size_t clen; |
|
|
|
|
|
|
|
|
|
|
|
ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
p = data; |
|
|
p = data; |
|
|
p_end = data + blen; |
|
|
p_end = data + blen; |
|
|
ncont = 0; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll1(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
const duk_uint32_t *p32; |
|
|
|
|
|
|
|
|
|
|
|
if (blen < 16) { |
|
|
if (blen < 16) { |
|
|
goto skip_fastpath; |
|
|
goto skip_fastpath; |
|
|
} |
|
|
} |
|
|
/* Align 'p' to 4. */ |
|
|
|
|
|
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
/* Full, aligned 4-byte reads. */ |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)); |
|
|
|
|
|
p32 = (const duk_uint32_t *) p; |
|
|
|
|
|
while (p32 != (const duk_uint32_t *) p_end) { |
|
|
|
|
|
duk_uint32_t x; |
|
|
|
|
|
x = *p32++; |
|
|
|
|
|
if ((x & 0x80808080UL) == 0) { |
|
|
|
|
|
; /* ASCII fast path */ |
|
|
|
|
|
} else { |
|
|
|
|
|
if ((x & 0xc0000000UL) == 0x80000000UL) { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
if ((x & 0x00c00000UL) == 0x00800000UL) { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
if ((x & 0x0000c000UL) == 0x00008000UL) { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
if ((x & 0x000000c0UL) == 0x00000080UL) { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
p = (const duk_uint8_t *) p32; |
|
|
|
|
|
/* Fall through to handle the rest. */ |
|
|
|
|
|
skip_fastpath: |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll2(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
const duk_uint32_t *p32; |
|
|
|
|
|
|
|
|
|
|
|
if (blen < 16) { |
|
|
/* Align 'p' to 4; the input data may have arbitrary alignment.
|
|
|
goto skip_fastpath; |
|
|
* End of string check not needed because blen >= 16. |
|
|
} |
|
|
*/ |
|
|
/* Align 'p' to 4. */ |
|
|
while (((duk_small_uint_t) (duk_uintptr_t) (const void *) p) & 0x03) { |
|
|
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
duk_uint8_t x; |
|
|
x = *p++; |
|
|
x = *p++; |
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) { |
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/* Full, aligned 4-byte reads. */ |
|
|
/* Full, aligned 4-byte reads. */ |
|
|
p_end = data + blen; |
|
|
p32_end = (const duk_uint32_t *) (p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03))); |
|
|
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)); |
|
|
|
|
|
p32 = (const duk_uint32_t *) p; |
|
|
p32 = (const duk_uint32_t *) p; |
|
|
while (p32 != (const duk_uint32_t *) p_end) { |
|
|
while (p32 != (const duk_uint32_t *) p32_end) { |
|
|
duk_uint32_t x; |
|
|
duk_uint32_t x; |
|
|
x = *p32++; |
|
|
x = *p32++; |
|
|
if ((x & 0x80808080UL) == 0) { |
|
|
if (DUK_LIKELY((x & 0x80808080UL) == 0)) { |
|
|
; /* ASCII fast path */ |
|
|
; /* ASCII fast path */ |
|
|
} else { |
|
|
} else { |
|
|
/* Flip highest bit of each byte which changes
|
|
|
/* Flip highest bit of each byte which changes
|
|
@ -516,260 +333,38 @@ DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll2(const duk_uint8_t *data, duk_ |
|
|
* allows an easy bit mask test. |
|
|
* allows an easy bit mask test. |
|
|
*/ |
|
|
*/ |
|
|
x ^= 0x80808080UL; |
|
|
x ^= 0x80808080UL; |
|
|
if (!(x & 0xc0000000UL)) { |
|
|
if (DUK_UNLIKELY(!(x & 0xc0000000UL))) { |
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
if (!(x & 0x00c00000UL)) { |
|
|
if (DUK_UNLIKELY(!(x & 0x00c00000UL))) { |
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
if (!(x & 0x0000c000UL)) { |
|
|
if (DUK_UNLIKELY(!(x & 0x0000c000UL))) { |
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
if (!(x & 0x000000c0UL)) { |
|
|
if (DUK_UNLIKELY(!(x & 0x000000c0UL))) { |
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
p = (const duk_uint8_t *) p32; |
|
|
p = (const duk_uint8_t *) p32; |
|
|
/* Fall through to handle the rest. */ |
|
|
/* Fall through to handle the rest. */ |
|
|
skip_fastpath: |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll3(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
const duk_uint32_t *p32; |
|
|
|
|
|
|
|
|
|
|
|
if (blen < 16) { |
|
|
|
|
|
goto skip_fastpath; |
|
|
|
|
|
} |
|
|
|
|
|
/* Align 'p' to 4. */ |
|
|
|
|
|
while (((duk_small_uint_t) (duk_uintptr_t) (void *) p) & 0x03) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
/* Full, aligned 4-byte reads. */ |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
p_end = p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)); |
|
|
|
|
|
p32 = (const duk_uint32_t *) p; |
|
|
|
|
|
while (p32 != (const duk_uint32_t *) p_end) { |
|
|
|
|
|
/* Bit tricks to work 4 bytes at a time, similar to the bit trick below.
|
|
|
|
|
|
* |
|
|
|
|
|
* 10xxxxxx 10xxxxxx 10xxxxxxx 10xxxxxx |
|
|
|
|
|
* ^ 0x40404040 11xxxxxx 11xxxxxx 11xxxxxxx 11xxxxxx |
|
|
|
|
|
* >> 6 00000011 00000011 000000011 00000011 |
|
|
|
|
|
* + 0x01010101 00000100 00000100 000000100 00000100 |
|
|
|
|
|
* ^ ^ ^ ^ |
|
|
|
|
|
* `--------+---------+--------+---- carry if cont byte [+] |
|
|
|
|
|
*/ |
|
|
|
|
|
duk_uint32_t x; |
|
|
|
|
|
x = *p32++; |
|
|
|
|
|
x = ((x ^ 0x40404040UL) >> 6) + 0x01010101UL; |
|
|
|
|
|
x &= 0x04040404UL; |
|
|
|
|
|
x = (x & 0xffffUL) + (x >> 16); /* two step sum of carries */ |
|
|
|
|
|
x = (x & 0xffUL) + (x >> 8); |
|
|
|
|
|
ncont += x >> 2; |
|
|
|
|
|
} |
|
|
|
|
|
/* Fall through to handle the rest. */ |
|
|
|
|
|
skip_fastpath: |
|
|
skip_fastpath: |
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
while (p != p_end) { |
|
|
duk_uint8_t x; |
|
|
duk_uint8_t x; |
|
|
x = *p++; |
|
|
x = *p++; |
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) { |
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll4(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
|
|
|
|
|
|
p_end = data + (blen & ((duk_size_t) (~0x03))); |
|
|
|
|
|
while (p < p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p < p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
if (x < 0x80 || x >= 0xc0) { |
|
|
|
|
|
; |
|
|
|
|
|
} else { |
|
|
|
|
|
ncont++; |
|
|
ncont++; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
DUK_ASSERT(ncont <= blen); |
|
|
return blen - ncont; |
|
|
clen = blen - ncont; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll5(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
|
|
|
|
|
|
p_end = data + (blen & ((duk_size_t) (~0x03))); |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_uint8_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += duk__ncont_incr[x]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll6(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
|
|
|
|
|
|
p_end = data + (blen & ((duk_size_t) (~0x03))); |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_small_uint_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
} |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_small_uint_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_LOCAL duk_size_t duk__unicode_utf8clen_unroll7(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
const duk_uint8_t *p = data; |
|
|
|
|
|
const duk_uint8_t *p_end; |
|
|
|
|
|
duk_size_t ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */ |
|
|
|
|
|
|
|
|
|
|
|
p_end = data + (blen & ((duk_size_t) (~0x03))); |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
/* Similar bit trick as above, but postpone the shift.
|
|
|
|
|
|
* This means we need to avoid overflows from the lower |
|
|
|
|
|
* bits and need the "x & 0xc0". |
|
|
|
|
|
*/ |
|
|
|
|
|
duk_small_uint_t x; |
|
|
|
|
|
duk_small_uint_t tmp = 0; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
tmp += ((x & 0xc0) ^ 0x40) + 0x40; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
tmp += ((x & 0xc0) ^ 0x40) + 0x40; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
tmp += ((x & 0xc0) ^ 0x40) + 0x40; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
tmp += ((x & 0xc0) ^ 0x40) + 0x40; |
|
|
|
|
|
ncont += tmp; |
|
|
|
|
|
} |
|
|
|
|
|
p_end = data + blen; |
|
|
|
|
|
while (p != p_end) { |
|
|
|
|
|
duk_small_uint_t x; |
|
|
|
|
|
x = *p++; |
|
|
|
|
|
ncont += ((x ^ 0x40) + 0x40) >> 8; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_ASSERT(ncont <= blen); |
|
|
|
|
|
return blen - ncont; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length(const duk_uint8_t *data, duk_size_t blen) { |
|
|
|
|
|
duk_size_t clen; |
|
|
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
|
clen = duk__unicode_utf8clen_simple1(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_simple2(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_simple3(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_simple4(data, blen); |
|
|
|
|
|
|
|
|
|
|
|
clen = duk__unicode_utf8clen_lookup1(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_lookup2(data, blen); |
|
|
|
|
|
|
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll1(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll2(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll3(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll4(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll5(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll6(data, blen); |
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll7(data, blen); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
clen = duk__unicode_utf8clen_unroll1(data, blen); |
|
|
|
|
|
DUK_ASSERT(clen <= blen); |
|
|
DUK_ASSERT(clen <= blen); |
|
|
return clen; |
|
|
return clen; |
|
|
} |
|
|
} |
|
|
|
|
|
#endif /* DUK_USE_PREFER_SIZE */ |
|
|
|
|
|
|
|
|
/*
|
|
|
/*
|
|
|
* Unicode range matcher |
|
|
* Unicode range matcher |
|
|