Browse Source

extmod/re1.5: Support escaping within RE classes.

Fixes issues #3178 and #5220.

Tests are added, including all the cases mentioned in both bugs.
pull/5234/head
Jim Mussared 5 years ago
committed by Damien George
parent
commit
ebf8332104
  1. 3
      extmod/re1.5/compilecode.c
  2. 20
      tests/extmod/ure1.py
  3. 1
      tests/extmod/ure_error.py

3
extmod/re1.5/compilecode.c

@ -53,6 +53,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
PC++; // Skip # of pair byte
prog->len++;
for (cnt = 0; *re != ']'; re++, cnt++) {
if (*re == '\\') {
++re;
}
if (!*re) return NULL;
EMIT(PC++, *re);
if (re[1] == '-' && re[2] != ']') {

20
tests/extmod/ure1.py

@ -88,3 +88,23 @@ except:
# bytes objects
m = re.match(rb'a+?', b'ab'); print(m.group(0))
print("===")
# escaping
m = re.match(r'a\.c', 'a.c'); print(m.group(0) if m else '')
m = re.match(r'a\.b', 'abc'); print(m is None)
m = re.match(r'a\.b', 'a\\bc'); print(m is None)
m = re.match(r'[a\-z]', 'abc'); print(m.group(0))
m = re.match(r'[.\]]*', '.].]a'); print(m.group(0))
m = re.match(r'[.\]+]*', '.]+.]a'); print(m.group(0))
m = re.match(r'[a-f0-9x\-yz]*', 'abxcd1-23'); print(m.group(0))
m = re.match(r'[a\\b]*', 'a\\aa\\bb\\bbab'); print(m.group(0))
m = re.search(r'[a\-z]', '-'); print(m.group(0))
m = re.search(r'[a\-z]', 'f'); print(m is None)
m = re.search(r'[a\]z]', 'a'); print(m.group(0))
print(re.compile(r'[-a]').split('foo-bar'))
print(re.compile(r'[a-]').split('foo-bar'))
print(re.compile(r'[ax\-]').split('foo-bar'))
print(re.compile(r'[a\-x]').split('foo-bar'))
print(re.compile(r'[\-ax]').split('foo-bar'))
print("===")

1
tests/extmod/ure_error.py

@ -23,3 +23,4 @@ test_re(r')')
test_re(r'[')
test_re(r'([')
test_re(r'([)')
test_re(r'[a\]')

Loading…
Cancel
Save