Browse Source

extmod/modure: If input string is bytes, return bytes results too.

This applies to match.group() and split().

For ARM Thumb2, this increased code size by 12 bytes.
pull/3187/head
Paul Sokolovsky 7 years ago
parent
commit
58b7b01cb5
  1. 9
      extmod/modure.c
  2. 3
      tests/extmod/ure1.py
  3. 5
      tests/extmod/ure_split.py

9
extmod/modure.c

@ -31,6 +31,7 @@
#include "py/nlr.h"
#include "py/runtime.h"
#include "py/binary.h"
#include "py/objstr.h"
#if MICROPY_PY_URE
@ -69,7 +70,8 @@ STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
// no match for this group
return mp_const_none;
}
return mp_obj_new_str(start, self->caps[no * 2 + 1] - start, false);
return mp_obj_new_str_of_type(mp_obj_get_type(self->str),
(const byte*)start, self->caps[no * 2 + 1] - start);
}
MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
@ -129,6 +131,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
Subject subj;
size_t len;
const mp_obj_type_t *str_type = mp_obj_get_type(args[1]);
subj.begin = mp_obj_str_get_data(args[1], &len);
subj.end = subj.begin + len;
int caps_num = (self->re.sub + 1) * 2;
@ -150,7 +153,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
break;
}
mp_obj_t s = mp_obj_new_str(subj.begin, caps[0] - subj.begin, false);
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, caps[0] - subj.begin);
mp_obj_list_append(retval, s);
if (self->re.sub > 0) {
mp_not_implemented("Splitting with sub-captures");
@ -161,7 +164,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
}
}
mp_obj_t s = mp_obj_new_str(subj.begin, subj.end - subj.begin, false);
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, subj.end - subj.begin);
mp_obj_list_append(retval, s);
return retval;
}

3
tests/extmod/ure1.py

@ -80,3 +80,6 @@ try:
re.compile("*")
except:
print("Caught invalid regex")
# bytes objects
m = re.match(rb'a+?', b'ab'); print(m.group(0))

5
tests/extmod/ure_split.py

@ -26,3 +26,8 @@ print(s)
r = re.compile("[a-f]+")
s = r.split("0a3b9")
print(s)
# bytes objects
r = re.compile(b"x")
s = r.split(b"fooxbar")
print(s)

Loading…
Cancel
Save