Browse Source

py/objstr: Skip whitespace in bytes.fromhex().

Skip whitespace characters between pairs of hex numbers.
This makes `bytes.fromhex()` compatible with cpython.

Includes simple test in `tests/basic/builtin_str_hex.py`.

Signed-off-by: Glenn Moloney <glenn.moloney@gmail.com>
pull/12057/head
Glenn Moloney 3 months ago
parent
commit
6367099f83
  1. 24
      py/objstr.c
  2. 15
      tests/basics/builtin_str_hex.py
  3. 11
      tests/basics/builtin_str_hex.py.exp

24
py/objstr.c

@ -2014,27 +2014,21 @@ mp_obj_t mp_obj_bytes_fromhex(mp_obj_t type_in, mp_obj_t data) {
mp_buffer_info_t bufinfo;
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
if ((bufinfo.len & 1) != 0) {
mp_raise_ValueError(MP_ERROR_TEXT("odd-length string"));
}
vstr_t vstr;
vstr_init_len(&vstr, bufinfo.len / 2);
byte *in = bufinfo.buf, *out = (byte *)vstr.buf;
byte hex_byte = 0;
for (mp_uint_t i = bufinfo.len; i--;) {
byte hex_ch = *in++;
if (unichar_isxdigit(hex_ch)) {
hex_byte += unichar_xdigit_value(hex_ch);
} else {
mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit found"));
byte *in_end = in + bufinfo.len;
mp_uint_t ch1, ch2;
while (in < in_end) {
if (unichar_isspace(ch1 = *in++)) {
continue; // Skip whitespace between hex digit pairs
}
if (i & 1) {
hex_byte <<= 4;
} else {
*out++ = hex_byte;
hex_byte = 0;
if (in == in_end || !unichar_isxdigit(ch1) || !unichar_isxdigit(ch2 = *in++)) {
mp_raise_ValueError(MP_ERROR_TEXT("non-hex digit"));
}
*out++ = (byte)((unichar_xdigit_value(ch1) << 4) | unichar_xdigit_value(ch2));
}
vstr.len = out - (byte *)vstr.buf; // Length may be shorter due to whitespace in input
return mp_obj_new_str_type_from_vstr(MP_OBJ_TO_PTR(type_in), &vstr);
}

15
tests/basics/builtin_str_hex.py

@ -20,5 +20,20 @@ for x in (
"08090a0b0c0d0e0f",
"7f80ff",
"313233344142434461626364",
"ab\tcd\n ef ",
"ab cd ef",
"ab cd ef ",
" ab cd ef ",
# Invalid hex strings:
"abcde", # Odd number of hex digits
"ab cd e",
"a b cd ef", # Spaces between hex pairs
"ab cd e f ",
"abga", # Invalid hex digits
"ab_cd",
"ab:cd",
):
try:
print(bytes.fromhex(x))
except ValueError as e:
print("ValueError:", e)

11
tests/basics/builtin_str_hex.py.exp

@ -26,3 +26,14 @@ b'\x00\x01\x02\x03\x04\x05\x06\x07'
b'\x08\t\n\x0b\x0c\r\x0e\x0f'
b'\x7f\x80\xff'
b'1234ABCDabcd'
b'\xab\xcd\xef'
b'\xab\xcd\xef'
b'\xab\xcd\xef'
b'\xab\xcd\xef'
ValueError: non-hex digit
ValueError: non-hex digit
ValueError: non-hex digit
ValueError: non-hex digit
ValueError: non-hex digit
ValueError: non-hex digit
ValueError: non-hex digit

Loading…
Cancel
Save