From 20ce4e60957b48b7268c8ed814536904c5252392 Mon Sep 17 00:00:00 2001 From: Sami Vaarala Date: Fri, 1 Feb 2013 00:20:12 +0200 Subject: [PATCH] move a lonely function to unicode support file --- src/duk_unicode.h | 1 + src/duk_unicode_support.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/duk_unicode.h b/src/duk_unicode.h index d22954f8..de4c3029 100644 --- a/src/duk_unicode.h +++ b/src/duk_unicode.h @@ -48,6 +48,7 @@ int duk_unicode_get_xutf8_length(duk_u32 x); size_t duk_unicode_encode_xutf8(duk_u32 x, duk_u8 *out); size_t duk_unicode_encode_cesu8(duk_u32 x, duk_u8 *out); duk_u32 duk_unicode_xutf8_get_u32(duk_hthread *thr, duk_u8 **ptr, duk_u8 *ptr_start, duk_u8 *ptr_end); +duk_u32 duk_unicode_unvalidated_utf8_length(duk_u8 *data, duk_u32 blen); int duk_unicode_is_whitespace(int x); int duk_unicode_is_line_terminator(int x); int duk_unicode_is_identifier_start(int x); diff --git a/src/duk_unicode_support.c b/src/duk_unicode_support.c index a448207f..d4b1628c 100644 --- a/src/duk_unicode_support.c +++ b/src/duk_unicode_support.c @@ -215,6 +215,29 @@ duk_u32 duk_unicode_xutf8_get_u32(duk_hthread *thr, duk_u8 **ptr, duk_u8 *ptr_st return 0; /* never here */ } +/* (extended) utf-8 length without codepoint encoding validation, used + * for string interning (should probably be inlined). + */ +duk_u32 duk_unicode_unvalidated_utf8_length(duk_u8 *data, duk_u32 blen) { + duk_u8 *p = data; + duk_u8 *p_end = data + blen; + duk_u32 clen = 0; + + while (p < p_end) { + duk_u8 x = *p++; + if (x < 0x80) { + clen++; + } else if (x >= 0xc0 ) { + /* 10xxxxxx = continuation chars (0x80...0xbf), above that + * initial bytes. + */ + clen++; + } + } + + return clen; +} + /* * Unicode range matcher *