Browse Source

Use utf8_get/next_char in building up a string's repr

pull/671/head
Chris Angelico 11 years ago
parent
commit
44b0d5cff8
  1. 49
      py/objstr.c

49
py/objstr.c

@ -83,40 +83,33 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
quote_char = '"'; quote_char = '"';
} }
print(env, "%c", quote_char); print(env, "%c", quote_char);
for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) { const char *s = (const char *)str_data, *top = (const char *)str_data + str_len;
if (*s == quote_char) { while (s < top) {
unichar ch;
if (is_bytes) {
ch = *(unsigned char *)s++; // Don't sign-extend bytes
} else {
ch = utf8_get_char(s);
s = utf8_next_char(s);
}
if (ch == quote_char) {
print(env, "\\%c", quote_char); print(env, "\\%c", quote_char);
} else if (*s == '\\') { } else if (ch == '\\') {
print(env, "\\\\"); print(env, "\\\\");
} else if (32 <= *s && *s <= 126) { } else if (32 <= ch && ch <= 126) {
print(env, "%c", *s); print(env, "%c", ch);
} else if (*s == '\n') { } else if (ch == '\n') {
print(env, "\\n"); print(env, "\\n");
} else if (*s == '\r') { } else if (ch == '\r') {
print(env, "\\r"); print(env, "\\r");
} else if (*s == '\t') { } else if (ch == '\t') {
print(env, "\\t"); print(env, "\\t");
} else if (*s == '\x7f') { } else if (ch < 0x100) {
print(env, "\\x7f"); print(env, "\\x%02x", ch);
} else if (is_bytes) { } else if (ch < 0x10000) {
print(env, "\\x%02x", *s); print(env, "\\u%04x", ch);
} else {
// Non-ASCII character. Decode UTF-8.
machine_int_t ord = *s++ & 0x7F;
for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) {
ord &= ~mask;
}
while (UTF8_IS_CONT(*s)) {
ord = (ord << 6) | (*s++ & 0x3F);
}
--s; // s will be incremented by the main loop
if (ord < 0x100) {
print(env, "\\x%02x", ord);
} else if (ord < 0x10000) {
print(env, "\\u%04x", ord);
} else { } else {
print(env, "\\U%08x", ord); print(env, "\\U%08x", ch);
}
} }
} }
print(env, "%c", quote_char); print(env, "%c", quote_char);

Loading…
Cancel
Save