Skip to content
Snippets Groups Projects
Commit 0b7184dc authored by Paul Sokolovsky's avatar Paul Sokolovsky
Browse files

Implement octal and hex escapes in strings.

parent 0914371f
No related branches found
No related tags found
No related merge requests found
...@@ -126,6 +126,10 @@ static bool is_following_digit(mp_lexer_t *lex) { ...@@ -126,6 +126,10 @@ static bool is_following_digit(mp_lexer_t *lex) {
return unichar_isdigit(lex->chr1); return unichar_isdigit(lex->chr1);
} }
static bool is_following_odigit(mp_lexer_t *lex) {
return lex->chr1 >= '0' && lex->chr1 <= '7';
}
// TODO UNICODE include unicode characters in definition of identifiers // TODO UNICODE include unicode characters in definition of identifiers
static bool is_head_of_identifier(mp_lexer_t *lex) { static bool is_head_of_identifier(mp_lexer_t *lex) {
return is_letter(lex) || lex->chr0 == '_'; return is_letter(lex) || lex->chr0 == '_';
...@@ -275,6 +279,32 @@ static const char *tok_kw[] = { ...@@ -275,6 +279,32 @@ static const char *tok_kw[] = {
NULL, NULL,
}; };
static int hex_digit(unichar c) {
// c is assumed to be hex digit
int n = c - '0';
if (n > 9) {
n &= ~('a' - 'A');
n -= ('A' - ('9' + 1));
}
return n;
}
// This is called with CUR_CHAR() before first hex digit, and should return with
// it pointing to last hex digit
static bool get_hex(mp_lexer_t *lex, int num_digits, uint *result) {
uint num = 0;
while (num_digits-- != 0) {
next_char(lex);
unichar c = CUR_CHAR(lex);
if (!unichar_isxdigit(c)) {
return false;
}
num = (num << 4) + hex_digit(c);
}
*result = num;
return true;
}
static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) { static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) {
// skip white space and comments // skip white space and comments
bool had_physical_newline = false; bool had_physical_newline = false;
...@@ -439,12 +469,34 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs ...@@ -439,12 +469,34 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
case 'v': c = 0x0b; break; case 'v': c = 0x0b; break;
case 'f': c = 0x0c; break; case 'f': c = 0x0c; break;
case 'r': c = 0x0d; break; case 'r': c = 0x0d; break;
// TODO \ooo octal case 'x':
case 'x': // TODO \xhh {
case 'N': // TODO \N{name} only in strings uint num;
case 'u': // TODO \uxxxx only in strings if (!get_hex(lex, 2, &num)) {
case 'U': // TODO \Uxxxxxxxx only in strings // TODO error message
default: break; // TODO error message assert(0);
}
c = num;
break;
}
case 'N': break; // TODO \N{name} only in strings
case 'u': break; // TODO \uxxxx only in strings
case 'U': break; // TODO \Uxxxxxxxx only in strings
default:
if (c >= '0' && c <= '7') {
// Octal sequence, 1-3 chars
int digits = 3;
int num = c - '0';
while (is_following_odigit(lex) && --digits != 0) {
next_char(lex);
num = num * 8 + (CUR_CHAR(lex) - '0');
}
c = num;
} else {
// TODO error message
assert(0);
}
break;
} }
if (c != MP_LEXER_CHAR_EOF) { if (c != MP_LEXER_CHAR_EOF) {
vstr_add_char(&lex->vstr, c); vstr_add_char(&lex->vstr, c);
......
...@@ -43,6 +43,7 @@ bool unichar_isspace(unichar c); ...@@ -43,6 +43,7 @@ bool unichar_isspace(unichar c);
bool unichar_isalpha(unichar c); bool unichar_isalpha(unichar c);
bool unichar_isprint(unichar c); bool unichar_isprint(unichar c);
bool unichar_isdigit(unichar c); bool unichar_isdigit(unichar c);
bool unichar_isxdigit(unichar c);
/** string ******************************************************/ /** string ******************************************************/
......
...@@ -62,6 +62,10 @@ bool unichar_isdigit(unichar c) { ...@@ -62,6 +62,10 @@ bool unichar_isdigit(unichar c) {
return c < 128 && (attr[c] & FL_DIGIT) != 0; return c < 128 && (attr[c] & FL_DIGIT) != 0;
} }
bool unichar_isxdigit(unichar c) {
return unichar_isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
/* /*
bool char_is_alpha_or_digit(unichar c) { bool char_is_alpha_or_digit(unichar c) {
return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0; return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
......
a = "a\1b"
print(len(a))
print(ord(a[1]))
print(len("a\123b"))
a = "a\12345b"
print(len(a))
print(ord(a[1]))
a = "a\xffb"
print(len(a))
print(ord(a[1]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment