Skip to content
Snippets Groups Projects
Commit 0b7184dc authored by Paul Sokolovsky's avatar Paul Sokolovsky
Browse files

Implement octal and hex escapes in strings.

parent 0914371f
No related branches found
No related tags found
No related merge requests found
......@@ -126,6 +126,10 @@ static bool is_following_digit(mp_lexer_t *lex) {
return unichar_isdigit(lex->chr1);
}
static bool is_following_odigit(mp_lexer_t *lex) {
return lex->chr1 >= '0' && lex->chr1 <= '7';
}
// TODO UNICODE include unicode characters in definition of identifiers
static bool is_head_of_identifier(mp_lexer_t *lex) {
return is_letter(lex) || lex->chr0 == '_';
......@@ -275,6 +279,32 @@ static const char *tok_kw[] = {
NULL,
};
static int hex_digit(unichar c) {
// c is assumed to be hex digit
int n = c - '0';
if (n > 9) {
n &= ~('a' - 'A');
n -= ('A' - ('9' + 1));
}
return n;
}
// This is called with CUR_CHAR() before first hex digit, and should return with
// it pointing to last hex digit
static bool get_hex(mp_lexer_t *lex, int num_digits, uint *result) {
uint num = 0;
while (num_digits-- != 0) {
next_char(lex);
unichar c = CUR_CHAR(lex);
if (!unichar_isxdigit(c)) {
return false;
}
num = (num << 4) + hex_digit(c);
}
*result = num;
return true;
}
static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool first_token) {
// skip white space and comments
bool had_physical_newline = false;
......@@ -439,12 +469,34 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
case 'v': c = 0x0b; break;
case 'f': c = 0x0c; break;
case 'r': c = 0x0d; break;
// TODO \ooo octal
case 'x': // TODO \xhh
case 'N': // TODO \N{name} only in strings
case 'u': // TODO \uxxxx only in strings
case 'U': // TODO \Uxxxxxxxx only in strings
default: break; // TODO error message
case 'x':
{
uint num;
if (!get_hex(lex, 2, &num)) {
// TODO error message
assert(0);
}
c = num;
break;
}
case 'N': break; // TODO \N{name} only in strings
case 'u': break; // TODO \uxxxx only in strings
case 'U': break; // TODO \Uxxxxxxxx only in strings
default:
if (c >= '0' && c <= '7') {
// Octal sequence, 1-3 chars
int digits = 3;
int num = c - '0';
while (is_following_odigit(lex) && --digits != 0) {
next_char(lex);
num = num * 8 + (CUR_CHAR(lex) - '0');
}
c = num;
} else {
// TODO error message
assert(0);
}
break;
}
if (c != MP_LEXER_CHAR_EOF) {
vstr_add_char(&lex->vstr, c);
......
......@@ -43,6 +43,7 @@ bool unichar_isspace(unichar c);
bool unichar_isalpha(unichar c);
bool unichar_isprint(unichar c);
bool unichar_isdigit(unichar c);
bool unichar_isxdigit(unichar c);
/** string ******************************************************/
......
......@@ -62,6 +62,10 @@ bool unichar_isdigit(unichar c) {
return c < 128 && (attr[c] & FL_DIGIT) != 0;
}
bool unichar_isxdigit(unichar c) {
return unichar_isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
/*
bool char_is_alpha_or_digit(unichar c) {
return c < 128 && (attr[c] & (FL_ALPHA | FL_DIGIT)) != 0;
......
a = "a\1b"
print(len(a))
print(ord(a[1]))
print(len("a\123b"))
a = "a\12345b"
print(len(a))
print(ord(a[1]))
a = "a\xffb"
print(len(a))
print(ord(a[1]))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment