From 95908b0f50e26fe2c90687966b60a0cf195f71de Mon Sep 17 00:00:00 2001
From: Paul Sokolovsky <pfalcon@users.sourceforge.net>
Date: Wed, 15 Oct 2014 04:43:13 +0300
Subject: [PATCH] modure: Update to re1.5 v0.6, support for char sets/classes
 ([a-c]).

---
 extmod/modure.c                    |  3 ++-
 extmod/re1.5/charclass.c           | 11 ++++++++++
 extmod/re1.5/compilecode.c         | 32 +++++++++++++++++++++++++++++-
 extmod/re1.5/dumpcode.c            | 12 ++++++++++-
 extmod/re1.5/{regexp.h => re1.5.h} |  5 +++++
 extmod/re1.5/recursiveloop.c       |  8 +++++++-
 tests/extmod/ure1.py               |  7 +++++++
 7 files changed, 74 insertions(+), 4 deletions(-)
 create mode 100644 extmod/re1.5/charclass.c
 rename extmod/re1.5/{regexp.h => re1.5.h} (97%)

diff --git a/extmod/modure.c b/extmod/modure.c
index 7acc045e7..ae47a2129 100644
--- a/extmod/modure.c
+++ b/extmod/modure.c
@@ -38,7 +38,7 @@
 
 #if MICROPY_PY_URE
 
-#include "re1.5/regexp.h"
+#include "re1.5/re1.5.h"
 
 #define FLAG_DEBUG 0x1000
 
@@ -245,5 +245,6 @@ const mp_obj_module_t mp_module_ure = {
 #include "re1.5/compilecode.c"
 #include "re1.5/dumpcode.c"
 #include "re1.5/recursiveloop.c"
+#include "re1.5/charclass.c"
 
 #endif //MICROPY_PY_URE
diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c
new file mode 100644
index 000000000..c9f617592
--- /dev/null
+++ b/extmod/re1.5/charclass.c
@@ -0,0 +1,11 @@
+#include "re1.5.h"
+
+int _re1_5_classmatch(const char *pc, const char *sp)
+{
+    // pc points to "cnt" byte after opcode
+    int cnt = *pc++;
+    while (cnt--) {
+        if (!(*sp >= *pc && *sp <= pc[1])) return 0;
+    }
+    return 1;
+}
\ No newline at end of file
diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c
index 5b5d28c2a..a7942b121 100644
--- a/extmod/re1.5/compilecode.c
+++ b/extmod/re1.5/compilecode.c
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "regexp.h"
+#include "re1.5.h"
 
 static void insert_code(char *code, int at, int num, int *pc)
 {
@@ -45,6 +45,18 @@ int re1_5_sizecode(const char *re)
             break;
         case ')':
             break;
+        case '[': {
+            pc += 2;
+            re++;
+            while (*re != ']') {
+                if (!*re) return -1;
+                if (re[1] == '-') {
+                    re += 2;
+                }
+                pc += 2;
+                re++;
+            }
+        }
         }
     }
 
@@ -76,6 +88,24 @@ const char *_compilecode(const char *re, ByteProg *prog)
             EMIT(pc++, Any);
             prog->len++;
             break;
+        case '[': {
+            int cnt;
+            term = pc;
+            EMIT(pc++, Class);
+            pc++; // Skip # of pair byte
+            prog->len++;
+            re++;
+            for (cnt = 0; *re != ']'; re++, cnt++) {
+                if (!*re) return NULL;
+                EMIT(pc++, *re);
+                if (re[1] == '-') {
+                    re += 2;
+                }
+                EMIT(pc++, *re);
+            }
+            EMIT(term + 1, cnt);
+            break;
+        }
         case '(':
             term = pc;
 
diff --git a/extmod/re1.5/dumpcode.c b/extmod/re1.5/dumpcode.c
index b91ded03a..ca41cfeda 100644
--- a/extmod/re1.5/dumpcode.c
+++ b/extmod/re1.5/dumpcode.c
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "regexp.h"
+#include "re1.5.h"
 
 void re1_5_dumpcode(ByteProg *prog)
 {
@@ -32,6 +32,16 @@ void re1_5_dumpcode(ByteProg *prog)
                 case Any:
                         printf("any\n");
                         break;
+                case Class: {
+                        int num = code[pc++];
+                        printf("class %d", num);
+                        while (num--) {
+                            printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
+                            pc += 2;
+                        }
+                        printf("\n");
+                        break;
+                }
                 case Match:
                         printf("match\n");
                         break;
diff --git a/extmod/re1.5/regexp.h b/extmod/re1.5/re1.5.h
similarity index 97%
rename from extmod/re1.5/regexp.h
rename to extmod/re1.5/re1.5.h
index 316b27076..ac41bab8f 100644
--- a/extmod/re1.5/regexp.h
+++ b/extmod/re1.5/re1.5.h
@@ -80,14 +80,18 @@ enum	/* Inst.opcode */
 	CONSUMERS = 1,
 	Char = CONSUMERS,
 	Any,
+	Class,
+
 	ASSERTS = 0x50,
 	Bol = ASSERTS,
 	Eol,
+
 	// Instructions which take relative offset as arg
 	JUMPS = 0x60,
 	Jmp = JUMPS,
 	Split,
 	RSplit,
+
 	// Other (special) instructions
 	Save = 0x7e,
 	Match = 0x7f,
@@ -139,5 +143,6 @@ int re1_5_sizecode(const char *re);
 int re1_5_compilecode(ByteProg *prog, const char *re);
 void re1_5_dumpcode(ByteProg *prog);
 void cleanmarks(ByteProg *prog);
+int _re1_5_classmatch(const char *pc, const char *sp);
 
 #endif /*_RE1_5_REGEXP__H*/
diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c
index 7b95eb4c9..26c6da43d 100644
--- a/extmod/re1.5/recursiveloop.c
+++ b/extmod/re1.5/recursiveloop.c
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "regexp.h"
+#include "re1.5.h"
 
 static int
 recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
@@ -23,6 +23,12 @@ recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int n
 		case Any:
 			sp++;
 			continue;
+		case Class:
+			if (!_re1_5_classmatch(pc, sp))
+				return 0;
+			pc += *(unsigned char*)pc * 2 + 1;
+			sp++;
+			continue;
 		case Match:
 			return 1;
 		case Jmp:
diff --git a/tests/extmod/ure1.py b/tests/extmod/ure1.py
index dff099c8c..577c8f61e 100644
--- a/tests/extmod/ure1.py
+++ b/tests/extmod/ure1.py
@@ -20,6 +20,13 @@ try:
 except IndexError:
     print("IndexError")
 
+r = re.compile("[a-c]")
+m = r.match("a")
+print(m.group(0))
+m = r.match("d")
+print(m)
+m = r.match("A")
+print(m)
 
 r = re.compile("o+")
 m = r.search("foobar")
-- 
GitLab