Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
M
micropython
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
card10
micropython
Commits
94fbe971
Commit
94fbe971
authored
10 years ago
by
Damien George
Browse files
Options
Downloads
Patches
Plain Diff
py: Change lexer stream API to return bytes not chars.
Lexer is now 8-bit clean inside strings.
parent
07133415
No related branches found
No related tags found
No related merge requests found
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
py/lexer.c
+20
-18
20 additions, 18 deletions
py/lexer.c
py/lexer.h
+6
-6
6 additions, 6 deletions
py/lexer.h
py/lexerstr.c
+3
-3
3 additions, 3 deletions
py/lexerstr.c
py/lexerunix.c
+7
-7
7 additions, 7 deletions
py/lexerunix.c
stmhal/lexerfatfs.c
+6
-6
6 additions, 6 deletions
stmhal/lexerfatfs.c
with
42 additions
and
40 deletions
py/lexer.c
+
20
−
18
View file @
94fbe971
...
...
@@ -45,7 +45,7 @@
struct
_mp_lexer_t
{
qstr
source_name
;
// name of source
void
*
stream_data
;
// data for stream
mp_lexer_stream_next_
char
_t
stream_next_
char
;
// stream callback to get next
char
mp_lexer_stream_next_
byte
_t
stream_next_
byte
;
// stream callback to get next
byte
mp_lexer_stream_close_t
stream_close
;
// stream callback to free
unichar
chr0
,
chr1
,
chr2
;
// current cached characters from source
...
...
@@ -103,7 +103,7 @@ void mp_token_show(const mp_token_t *tok) {
#define CUR_CHAR(lex) ((lex)->chr0)
STATIC
bool
is_end
(
mp_lexer_t
*
lex
)
{
return
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
;
return
lex
->
chr0
==
MP_LEXER_EOF
;
}
STATIC
bool
is_physical_newline
(
mp_lexer_t
*
lex
)
{
...
...
@@ -171,7 +171,7 @@ STATIC bool is_tail_of_identifier(mp_lexer_t *lex) {
}
STATIC
void
next_char
(
mp_lexer_t
*
lex
)
{
if
(
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
)
{
if
(
lex
->
chr0
==
MP_LEXER_EOF
)
{
return
;
}
...
...
@@ -200,10 +200,10 @@ STATIC void next_char(mp_lexer_t *lex) {
for
(;
advance
>
0
;
advance
--
)
{
lex
->
chr0
=
lex
->
chr1
;
lex
->
chr1
=
lex
->
chr2
;
lex
->
chr2
=
lex
->
stream_next_
char
(
lex
->
stream_data
);
if
(
lex
->
chr2
==
MP_LEXER_
CHAR_
EOF
)
{
lex
->
chr2
=
lex
->
stream_next_
byte
(
lex
->
stream_data
);
if
(
lex
->
chr2
==
MP_LEXER_EOF
)
{
// EOF
if
(
lex
->
chr1
!=
MP_LEXER_
CHAR_
EOF
&&
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
if
(
lex
->
chr1
!=
MP_LEXER_EOF
&&
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
lex
->
chr2
=
'\n'
;
// insert newline at end of file
}
}
...
...
@@ -491,8 +491,8 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
vstr_add_char
(
&
lex
->
vstr
,
'\\'
);
}
else
{
switch
(
c
)
{
case
MP_LEXER_
CHAR_
EOF
:
break
;
// TODO a proper error message?
case
'\n'
:
c
=
MP_LEXER_
CHAR_
EOF
;
break
;
// TODO check this works correctly (we are supposed to ignore it
case
MP_LEXER_EOF
:
break
;
// TODO a proper error message?
case
'\n'
:
c
=
MP_LEXER_EOF
;
break
;
// TODO check this works correctly (we are supposed to ignore it
case
'\\'
:
break
;
case
'\''
:
break
;
case
'"'
:
break
;
...
...
@@ -546,7 +546,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
break
;
}
}
if
(
c
!=
MP_LEXER_
CHAR_
EOF
)
{
if
(
c
!=
MP_LEXER_EOF
)
{
if
(
c
<
0x110000
&&
!
is_bytes
)
{
vstr_add_char
(
&
lex
->
vstr
,
c
);
}
else
if
(
c
<
0x100
&&
is_bytes
)
{
...
...
@@ -556,7 +556,9 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
}
else
{
vstr_add_char
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
// Add the "character" as a byte so that we remain 8-bit clean.
// This way, strings are parsed correctly whether or not they contain utf-8 chars.
vstr_add_byte
(
&
lex
->
vstr
,
CUR_CHAR
(
lex
));
}
}
next_char
(
lex
);
...
...
@@ -728,7 +730,7 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs
}
}
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
char
_t
stream_next_
char
,
mp_lexer_stream_close_t
stream_close
)
{
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
byte
_t
stream_next_
byte
,
mp_lexer_stream_close_t
stream_close
)
{
mp_lexer_t
*
lex
=
m_new_maybe
(
mp_lexer_t
,
1
);
// check for memory allocation error
...
...
@@ -741,7 +743,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex
->
source_name
=
src_name
;
lex
->
stream_data
=
stream_data
;
lex
->
stream_next_
char
=
stream_next_
char
;
lex
->
stream_next_
byte
=
stream_next_
byte
;
lex
->
stream_close
=
stream_close
;
lex
->
line
=
1
;
lex
->
column
=
1
;
...
...
@@ -762,18 +764,18 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
lex
->
indent_level
[
0
]
=
0
;
// preload characters
lex
->
chr0
=
stream_next_
char
(
stream_data
);
lex
->
chr1
=
stream_next_
char
(
stream_data
);
lex
->
chr2
=
stream_next_
char
(
stream_data
);
lex
->
chr0
=
stream_next_
byte
(
stream_data
);
lex
->
chr1
=
stream_next_
byte
(
stream_data
);
lex
->
chr2
=
stream_next_
byte
(
stream_data
);
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
if
(
lex
->
chr0
==
MP_LEXER_
CHAR_
EOF
)
{
if
(
lex
->
chr0
==
MP_LEXER_EOF
)
{
lex
->
chr0
=
'\n'
;
}
else
if
(
lex
->
chr1
==
MP_LEXER_
CHAR_
EOF
)
{
}
else
if
(
lex
->
chr1
==
MP_LEXER_EOF
)
{
if
(
lex
->
chr0
!=
'\n'
&&
lex
->
chr0
!=
'\r'
)
{
lex
->
chr1
=
'\n'
;
}
}
else
if
(
lex
->
chr2
==
MP_LEXER_
CHAR_
EOF
)
{
}
else
if
(
lex
->
chr2
==
MP_LEXER_EOF
)
{
if
(
lex
->
chr1
!=
'\n'
&&
lex
->
chr1
!=
'\r'
)
{
lex
->
chr2
=
'\n'
;
}
...
...
This diff is collapsed.
Click to expand it.
py/lexer.h
+
6
−
6
View file @
94fbe971
...
...
@@ -139,18 +139,18 @@ typedef struct _mp_token_t {
mp_uint_t
len
;
// (byte) length of string of token
}
mp_token_t
;
// the next-
char
function must return the next
charac
te
r
in the stream
// it must return MP_LEXER_
CHAR_
EOF if end of stream
// it can be called again after returning MP_LEXER_
CHAR_
EOF, and in that case must return MP_LEXER_
CHAR_
EOF
#define MP_LEXER_
CHAR_
EOF (-1)
typedef
unichar
(
*
mp_lexer_stream_next_
char
_t
)(
void
*
);
// the next-
byte
function must return the next
by
te in the stream
// it must return MP_LEXER_EOF if end of stream
// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
#define MP_LEXER_EOF (-1)
typedef
mp_uint_t
(
*
mp_lexer_stream_next_
byte
_t
)(
void
*
);
typedef
void
(
*
mp_lexer_stream_close_t
)(
void
*
);
typedef
struct
_mp_lexer_t
mp_lexer_t
;
void
mp_token_show
(
const
mp_token_t
*
tok
);
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
char
_t
stream_next_
char
,
mp_lexer_stream_close_t
stream_close
);
mp_lexer_t
*
mp_lexer_new
(
qstr
src_name
,
void
*
stream_data
,
mp_lexer_stream_next_
byte
_t
stream_next_
byte
,
mp_lexer_stream_close_t
stream_close
);
mp_lexer_t
*
mp_lexer_new_from_str_len
(
qstr
src_name
,
const
char
*
str
,
mp_uint_t
len
,
mp_uint_t
free_len
);
void
mp_lexer_free
(
mp_lexer_t
*
lex
);
...
...
This diff is collapsed.
Click to expand it.
py/lexerstr.c
+
3
−
3
View file @
94fbe971
...
...
@@ -36,11 +36,11 @@ typedef struct _mp_lexer_str_buf_t {
const
char
*
src_end
;
// end (exclusive) of source
}
mp_lexer_str_buf_t
;
STATIC
unichar
str_buf_next_
char
(
mp_lexer_str_buf_t
*
sb
)
{
STATIC
mp_uint_t
str_buf_next_
byte
(
mp_lexer_str_buf_t
*
sb
)
{
if
(
sb
->
src_cur
<
sb
->
src_end
)
{
return
*
sb
->
src_cur
++
;
}
else
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
}
...
...
@@ -57,5 +57,5 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
sb
->
src_beg
=
str
;
sb
->
src_cur
=
str
;
sb
->
src_end
=
str
+
len
;
return
mp_lexer_new
(
src_name
,
sb
,
(
mp_lexer_stream_next_
char
_t
)
str_buf_next_
char
,
(
mp_lexer_stream_close_t
)
str_buf_free
);
return
mp_lexer_new
(
src_name
,
sb
,
(
mp_lexer_stream_next_
byte
_t
)
str_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
str_buf_free
);
}
This diff is collapsed.
Click to expand it.
py/lexerunix.c
+
7
−
7
View file @
94fbe971
...
...
@@ -41,20 +41,20 @@
typedef
struct
_mp_lexer_file_buf_t
{
int
fd
;
char
buf
[
20
];
uint
len
;
uint
pos
;
byte
buf
[
20
];
mp_
uint
_t
len
;
mp_
uint
_t
pos
;
}
mp_lexer_file_buf_t
;
STATIC
unichar
file_buf_next_
char
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
mp_uint_t
file_buf_next_
byte
(
mp_lexer_file_buf_t
*
fb
)
{
if
(
fb
->
pos
>=
fb
->
len
)
{
if
(
fb
->
len
==
0
)
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
else
{
int
n
=
read
(
fb
->
fd
,
fb
->
buf
,
sizeof
(
fb
->
buf
));
if
(
n
<=
0
)
{
fb
->
len
=
0
;
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
fb
->
len
=
n
;
fb
->
pos
=
0
;
...
...
@@ -78,7 +78,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
int
n
=
read
(
fb
->
fd
,
fb
->
buf
,
sizeof
(
fb
->
buf
));
fb
->
len
=
n
;
fb
->
pos
=
0
;
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
char
_t
)
file_buf_next_
char
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
byte
_t
)
file_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
}
#endif // MICROPY_HELPER_LEXER_UNIX
This diff is collapsed.
Click to expand it.
stmhal/lexerfatfs.c
+
6
−
6
View file @
94fbe971
...
...
@@ -36,20 +36,20 @@
typedef
struct
_mp_lexer_file_buf_t
{
FIL
fp
;
char
buf
[
20
];
byte
buf
[
20
];
uint16_t
len
;
uint16_t
pos
;
}
mp_lexer_file_buf_t
;
static
unichar
file_buf_next_
char
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
mp_uint_t
file_buf_next_
byte
(
mp_lexer_file_buf_t
*
fb
)
{
if
(
fb
->
pos
>=
fb
->
len
)
{
if
(
fb
->
len
<
sizeof
(
fb
->
buf
))
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
else
{
UINT
n
;
f_read
(
&
fb
->
fp
,
fb
->
buf
,
sizeof
(
fb
->
buf
),
&
n
);
if
(
n
==
0
)
{
return
MP_LEXER_
CHAR_
EOF
;
return
MP_LEXER_EOF
;
}
fb
->
len
=
n
;
fb
->
pos
=
0
;
...
...
@@ -58,7 +58,7 @@ static unichar file_buf_next_char(mp_lexer_file_buf_t *fb) {
return
fb
->
buf
[
fb
->
pos
++
];
}
static
void
file_buf_close
(
mp_lexer_file_buf_t
*
fb
)
{
STATIC
void
file_buf_close
(
mp_lexer_file_buf_t
*
fb
)
{
f_close
(
&
fb
->
fp
);
m_del_obj
(
mp_lexer_file_buf_t
,
fb
);
}
...
...
@@ -74,5 +74,5 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
f_read
(
&
fb
->
fp
,
fb
->
buf
,
sizeof
(
fb
->
buf
),
&
n
);
fb
->
len
=
n
;
fb
->
pos
=
0
;
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
char
_t
)
file_buf_next_
char
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
return
mp_lexer_new
(
qstr_from_str
(
filename
),
fb
,
(
mp_lexer_stream_next_
byte
_t
)
file_buf_next_
byte
,
(
mp_lexer_stream_close_t
)
file_buf_close
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment