Browse Source

py: Be more precise about unicode type and disabled unicode behaviour.

pull/1094/head
Damien George 10 years ago
parent
commit
16677ce311
  1. 10
      py/lexer.c
  2. 10
      py/misc.h
  3. 4
      py/modbuiltins.c

10
py/lexer.c

@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
}
}
if (c != MP_LEXER_EOF) {
#if MICROPY_PY_BUILTINS_STR_UNICODE
if (c < 0x110000 && !is_bytes) {
vstr_add_char(&lex->vstr, c);
} else if (c < 0x100 && is_bytes) {
vstr_add_byte(&lex->vstr, c);
} else {
}
#else
// without unicode everything is just added as an 8-bit byte
if (c < 0x100) {
vstr_add_byte(&lex->vstr, c);
}
#endif
else {
assert(!"TODO: Throw an error, invalid escape code probably");
}
}

10
py/misc.h

@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void);
/** unichar / UTF-8 *********************************************/
typedef int unichar; // TODO
#if MICROPY_PY_BUILTINS_STR_UNICODE
#include <stdint.h> // only include if we need it
// with unicode enabled we need a type which can fit chars up to 0x10ffff
typedef uint32_t unichar;
#else
// without unicode enabled we can only need to fit chars up to 0xff
// (on 16-bit archs uint is 16-bits and more efficient than uint32_t)
typedef uint unichar;
#endif
unichar utf8_get_char(const byte *s);
const byte *utf8_next_char(const byte *s);

4
py/modbuiltins.c

@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
return mp_obj_new_str(str, len, true);
#else
mp_int_t ord = mp_obj_get_int(o_in);
if (0 <= ord && ord <= 0x10ffff) {
if (0 <= ord && ord <= 0xff) {
char str[1] = {ord};
return mp_obj_new_str(str, 1, true);
} else {
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)"));
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(256)"));
}
#endif
}

Loading…
Cancel
Save