duktape/src-input/duk_api_string.c


								/*

								 *  String manipulation

								 */


								#include "duk_internal.h"


								DUK_LOCAL void duk__concat_and_join_helper(duk_hthread *thr, duk_idx_t count_in, duk_bool_t is_join) {

									duk_uint_t count;

									duk_uint_t i;

									duk_size_t idx;

									duk_size_t len;

									duk_hstring *h;

									duk_uint8_t *buf;


									DUK_CTX_ASSERT_VALID(thr);


									if (DUK_UNLIKELY(count_in <= 0)) {

										if (count_in < 0) {

											DUK_ERROR_RANGE_INVALID_COUNT(thr);

											DUK_WO_NORETURN(return;);

										}

										DUK_ASSERT(count_in == 0);

										duk_push_hstring_empty(thr);

										return;

									}

									count = (duk_uint_t) count_in;


									if (is_join) {

										duk_size_t t1, t2, limit;

										h = duk_to_hstring(thr, -((duk_idx_t) count) - 1);

										DUK_ASSERT(h != NULL);


										/* A bit tricky overflow test, see doc/code-issues.rst. */

										t1 = (duk_size_t) duk_hstring_get_bytelen(h);

										t2 = (duk_size_t) (count - 1);

										limit = (duk_size_t) DUK_HSTRING_MAX_BYTELEN;

										if (DUK_UNLIKELY(t2 != 0 && t1 > limit / t2)) {

											/* Combined size of separators already overflows. */

											goto error_overflow;

										}

										len = (duk_size_t) (t1 * t2);

									} else {

										len = (duk_size_t) 0;

									}


									for (i = count; i >= 1; i--) {

										duk_size_t new_len;

										h = duk_to_hstring(thr, -((duk_idx_t) i));

										new_len = len + (duk_size_t) duk_hstring_get_bytelen(h);


										/* Impose a string maximum length, need to handle overflow

										 * correctly.

										 */

										if (new_len < len || /* wrapped */

										    new_len > (duk_size_t) DUK_HSTRING_MAX_BYTELEN) {

											goto error_overflow;

										}

										len = new_len;

									}


									DUK_DDD(DUK_DDDPRINT("join/concat %lu strings, total length %lu bytes", (unsigned long) count, (unsigned long) len));


									/* Use stack allocated buffer to ensure reachability in errors

									 * (e.g. intern error).

									 */

									buf = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, len);

									DUK_ASSERT(buf != NULL);


									/* [ ... (sep) str1 str2 ... strN buf ] */


									idx = 0;

									for (i = count; i >= 1; i--) {

										const duk_uint8_t *part_data;

										size_t part_blen;


										if (is_join && i != count) {

											const duk_uint8_t *join_data;

											size_t join_blen;


											h = duk_require_hstring(thr, -((duk_idx_t) count) - 2); /* extra -1 for buffer */

											join_data = duk_hstring_get_data_and_bytelen(h, &join_blen);

											duk_memcpy(buf + idx, join_data, join_blen);

											idx += join_blen;

										}


										h = duk_require_hstring(thr, -((duk_idx_t) i) - 1); /* extra -1 for buffer */

										part_data = duk_hstring_get_data_and_bytelen(h, &part_blen);

										duk_memcpy(buf + idx, part_data, part_blen);

										idx += part_blen;

									}


									DUK_ASSERT(idx == len);


									/* [ ... (sep) str1 str2 ... strN buf ] */


									/* Get rid of the strings early to minimize memory use before intern. */


									if (is_join) {

										duk_replace(thr, -((duk_idx_t) count) - 2); /* overwrite sep */

										duk_pop_n(thr, (duk_idx_t) count);

									} else {

										duk_replace(thr, -((duk_idx_t) count) - 1); /* overwrite str1 */

										duk_pop_n(thr, (duk_idx_t) (count - 1));

									}


									/* [ ... buf ] */


									/* The accumulation buffer contains string parts which are valid

									 * WTF-8 individually, but unpaired surrogates may pair up in the

									 * join points and must be combined.  This could be done inline

									 * when the parts are processed above, but here we rely on the intern

									 * WTF-8 sanitization step to combine surrogate pairs.

									 */

									(void) duk_buffer_to_string(thr, -1); /* Safe if inputs are safe. */


									/* [ ... res ] */

									return;


								error_overflow:

									DUK_ERROR_RANGE(thr, DUK_STR_RESULT_TOO_LONG);

									DUK_WO_NORETURN(return;);

								}


								DUK_EXTERNAL void duk_concat(duk_hthread *thr, duk_idx_t count) {

									DUK_ASSERT_API_ENTRY(thr);


									duk__concat_and_join_helper(thr, count, 0 /*is_join*/);

								}


								#if defined(DUK_USE_PREFER_SIZE)

								DUK_INTERNAL void duk_concat_2(duk_hthread *thr) {

									DUK_ASSERT_API_ENTRY(thr);

									duk_concat(thr, 2);

								}

								#else /* DUK_USE_PREFER_SIZE */

								DUK_INTERNAL void duk_concat_2(duk_hthread *thr) {

									duk_hstring *h1;

									duk_hstring *h2;

									duk_uint8_t *buf;

									duk_size_t blen1;

									duk_size_t blen2;

									duk_size_t blen;


									DUK_ASSERT_API_ENTRY(thr);

									DUK_ASSERT(duk_get_top(thr) >= 2); /* Trusted caller. */


									h1 = duk_to_hstring(thr, -2);

									h2 = duk_to_hstring(thr, -1);

									blen1 = (duk_size_t) duk_hstring_get_bytelen(h1);

									blen2 = (duk_size_t) duk_hstring_get_bytelen(h2);

									blen = blen1 + blen2;

									if (DUK_UNLIKELY(blen < blen1 || /* wrapped */

									                 blen > (duk_size_t) DUK_HSTRING_MAX_BYTELEN)) {

										goto error_overflow;

									}

									buf = (duk_uint8_t *) duk_push_fixed_buffer_nozero(thr, blen);

									DUK_ASSERT(buf != NULL);


									duk_memcpy((void *) buf, (const void *) duk_hstring_get_data(h1), (size_t) blen1);

									duk_memcpy((void *) (buf + blen1), (const void *) duk_hstring_get_data(h2), (size_t) blen2);


									/* Surrogates in the join point may need to be combined, handled by

									 * the intern WTF-8 sanitize step.

									 */

									(void) duk_buffer_to_string(thr, -1); /* Safe if inputs are safe. */


									/* [ ... str1 str2 buf ] */


									duk_replace(thr, -3);

									duk_pop_known(thr);

									return;


								error_overflow:

									DUK_ERROR_RANGE(thr, DUK_STR_RESULT_TOO_LONG);

									DUK_WO_NORETURN(return;);

								}

								#endif /* DUK_USE_PREFER_SIZE */


								DUK_EXTERNAL void duk_join(duk_hthread *thr, duk_idx_t count) {

									DUK_ASSERT_API_ENTRY(thr);


									duk__concat_and_join_helper(thr, count, 1 /*is_join*/);

								}


								/* XXX: could map/decode be unified with duk_unicode_support.c code?

								 * Case conversion needs also the character surroundings though.

								 */


								DUK_EXTERNAL void duk_decode_string(duk_hthread *thr, duk_idx_t idx, duk_decode_char_function callback, void *udata) {

									duk_hstring *h_input;

									const duk_uint8_t *p, *p_start, *p_end;

									duk_codepoint_t cp;


									DUK_ASSERT_API_ENTRY(thr);


									h_input = duk_require_hstring(thr, idx); /* Accept symbols. */

									DUK_ASSERT(h_input != NULL);


									p_start = (const duk_uint8_t *) duk_hstring_get_data(h_input);

									p_end = p_start + duk_hstring_get_bytelen(h_input);

									p = p_start;


									for (;;) {

										if (p >= p_end) {

											break;

										}

										cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);

										callback(udata, cp);

									}

								}


								DUK_EXTERNAL void duk_map_string(duk_hthread *thr, duk_idx_t idx, duk_map_char_function callback, void *udata) {

									duk_hstring *h_input;

									duk_size_t input_blen;

									duk_bufwriter_ctx bw_alloc;

									duk_bufwriter_ctx *bw;

									const duk_uint8_t *p, *p_start, *p_end;

									duk_codepoint_t cp;


									DUK_ASSERT_API_ENTRY(thr);


									idx = duk_normalize_index(thr, idx);


									h_input = duk_require_hstring(thr, idx); /* Accept symbols. */

									DUK_ASSERT(h_input != NULL);


									input_blen = duk_hstring_get_bytelen(h_input);


									bw = &bw_alloc;

									DUK_BW_INIT_PUSHBUF(thr, bw, input_blen); /* Reasonable output estimate. */


									p_start = duk_hstring_get_data(h_input);

									p_end = p_start + input_blen;

									p = p_start;


									for (;;) {

										/* XXX: could write output in chunks with fewer ensure calls,

										 * but relative benefit would be small here.

										 */

										if (p >= p_end) {

											break;

										}

										cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);

										cp = callback(udata, cp);


										/* We could handle WTF-8 normalization here already by pairing to a

										 * previous surrogate here.  We don't now, and surrogate pairs get

										 * WTF-8 converted in the buffer-to-string conversion.

										 */

										DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);

									}


									/* Surrogates in join point are paired by string intern WTF-8 sanitize step. */

									DUK_BW_COMPACT(thr, bw);

									(void) duk_buffer_to_string(thr, -1);

									duk_replace(thr, idx);

								}


								DUK_EXTERNAL void duk_substring(duk_hthread *thr, duk_idx_t idx, duk_size_t start_offset, duk_size_t end_offset) {

									duk_hstring *h;

									duk_size_t charlen;


									DUK_ASSERT_API_ENTRY(thr);


									idx = duk_require_normalize_index(thr, idx); /* Accept symbols. */

									h = duk_require_hstring(thr, idx);

									DUK_ASSERT(h != NULL);


									charlen = duk_hstring_get_charlen(h);

									if (end_offset >= charlen) {

										end_offset = charlen;

									}

									if (start_offset > end_offset) {

										start_offset = end_offset;

									}


									DUK_ASSERT_DISABLE(start_offset >= 0);

									DUK_ASSERT(start_offset <= end_offset && start_offset <= duk_hstring_get_charlen(h));

									DUK_ASSERT_DISABLE(end_offset >= 0);

									DUK_ASSERT(end_offset >= start_offset && end_offset <= duk_hstring_get_charlen(h));


									/* Guaranteed by string limits. */

									DUK_ASSERT(start_offset <= DUK_UINT32_MAX);

									DUK_ASSERT(end_offset <= DUK_UINT32_MAX);


									(void) duk_push_wtf8_substring_hstring(thr, h, start_offset, end_offset);

									duk_replace(thr, idx);

								}


								/* XXX: this is quite clunky.  Add Unicode helpers to scan backwards and

								 * forwards with a callback to process codepoints?

								 */

								DUK_EXTERNAL void duk_trim(duk_hthread *thr, duk_idx_t idx) {

									duk_hstring *h;

									const duk_uint8_t *p, *p_start, *p_end, *p_tmp1, *p_tmp2; /* pointers for scanning */

									const duk_uint8_t *q_start, *q_end; /* start (incl) and end (excl) of trimmed part */

									duk_codepoint_t cp;


									DUK_ASSERT_API_ENTRY(thr);


									idx = duk_require_normalize_index(thr, idx); /* Accept symbols. */

									h = duk_require_hstring(thr, idx);

									DUK_ASSERT(h != NULL);


									p_start = duk_hstring_get_data(h);

									p_end = p_start + duk_hstring_get_bytelen(h);


									p = p_start;

									while (p < p_end) {

										p_tmp1 = p;

										cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p_tmp1, p_start, p_end);

										if (!(duk_unicode_is_whitespace(cp) || duk_unicode_is_line_terminator(cp))) {

											break;

										}

										p = p_tmp1;

									}

									q_start = p;

									if (p == p_end) {

										/* Entire string is whitespace. */

										q_end = p;

										goto scan_done;

									}


									p = p_end;

									while (p > p_start) {

										p_tmp1 = p;

										while (p > p_start) {

											p--;

											if (((*p) & 0xc0) != 0x80) {

												break;

											}

										}

										p_tmp2 = p;


										cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p_tmp2, p_start, p_end);

										if (!(duk_unicode_is_whitespace(cp) || duk_unicode_is_line_terminator(cp))) {

											p = p_tmp1;

											break;

										}

									}

									q_end = p;


								scan_done:

									/* This may happen when forward and backward scanning disagree

									 * (possible for non-extended-UTF-8 strings).

									 */

									if (q_end < q_start) {

										q_end = q_start;

									}


									DUK_ASSERT(q_start >= p_start && q_start <= p_end);

									DUK_ASSERT(q_end >= p_start && q_end <= p_end);

									DUK_ASSERT(q_end >= q_start);


									DUK_DDD(DUK_DDDPRINT("trim: p_start=%p, p_end=%p, q_start=%p, q_end=%p",

									                     (const void *) p_start,

									                     (const void *) p_end,

									                     (const void *) q_start,

									                     (const void *) q_end));


									if (q_start == p_start && q_end == p_end) {

										DUK_DDD(DUK_DDDPRINT("nothing was trimmed: avoid interning (hashing etc)"));

										return;

									}


									duk_push_lstring(thr, (const char *) q_start, (duk_size_t) (q_end - q_start));

									duk_replace(thr, idx);

								}


								DUK_EXTERNAL duk_codepoint_t duk_char_code_at(duk_hthread *thr, duk_idx_t idx, duk_size_t char_offset) {

									duk_hstring *h;

									duk_ucodepoint_t cp;


									DUK_ASSERT_API_ENTRY(thr);


									/* XXX: Share code with String.prototype.charCodeAt?  Main difference

									 * is handling of clamped offsets.

									 */


									h = duk_require_hstring(thr, idx); /* Accept symbols. */

									DUK_ASSERT(h != NULL);


									DUK_ASSERT_DISABLE(char_offset >= 0); /* Always true, arg is unsigned. */

									if (char_offset >= duk_hstring_get_charlen(h)) {

										return 0;

									}


									DUK_ASSERT(char_offset <= DUK_UINT_MAX); /* Guaranteed by string limits. */

									cp = duk_hstring_char_code_at_raw(thr, h, (duk_uint_t) char_offset, 0 /*surrogate_aware*/);

									return (duk_codepoint_t) cp;

								}