From c67972e3c5b9c665b15bca3491e2f46cf478a7bd Mon Sep 17 00:00:00 2001
From: schneider <schneider@blinkenlichts.net>
Date: Fri, 3 Apr 2020 18:27:10 +0200
Subject: [PATCH] refact(config): Make config parser more robust

Introduces a small state machine which parses the file character by
character. This eases the logic around parsing whole lines
significantly.

Now also trims lines, handling all non printable characters correctly.
---
 epicardium/modules/config.c | 160 +++++++++++++++---------------------
 1 file changed, 64 insertions(+), 96 deletions(-)

diff --git a/epicardium/modules/config.c b/epicardium/modules/config.c
index 7371f02a9..8e39efd2c 100644
--- a/epicardium/modules/config.c
+++ b/epicardium/modules/config.c
@@ -125,25 +125,37 @@ static void add_config_pair(
 	slot->value_offset = value_offset;
 }
 
+static char *trim(char *str)
+{
+	char *start = str;
+	while (*start && !isgraph((int)*start))
+		start++;
+
+	if (strlen(start) > 0) {
+		char *end = start + strlen(start) - 1;
+		while (*end && !isgraph((int)*end))
+			end--;
+		end[1] = 0;
+	}
+	return start;
+}
+
 // parses one line of the config file
-static void
-parse_line(char *line, char *eol, int line_number, size_t line_offset)
+static void parse_line(char *line, int line_number, size_t line_offset)
 {
 	char *line_start = line;
 
-	//skip leading whitespace
-	while (*line && isspace((int)*line))
-		++line;
+	line = trim(line);
 
-	char *key = line;
-	if (*key == '#') {
+	//printf(line);
+	if (*line == '#') {
 		//skip comments
 		return;
 	}
 
 	char *eq = strchr(line, '=');
 	if (!eq) {
-		if (*key) {
+		if (*line) {
 			LOG_WARN(
 				"card10.cfg",
 				"line %d: syntax error",
@@ -152,26 +164,15 @@ parse_line(char *line, char *eol, int line_number, size_t line_offset)
 		}
 		return;
 	}
+	*eq = 0;
 
-	char *e_key = eq - 1;
-	//skip trailing whitespace in key
-	while (e_key > key && isspace((int)*e_key))
-		--e_key;
-	e_key[1] = '\0';
+	char *key = trim(line);
 	if (*key == '\0') {
 		LOG_WARN("card10.cfg", "line %d: empty key", line_number);
 		return;
 	}
 
-	char *value = eq + 1;
-	//skip leading whitespace
-	while (*value && isspace((int)*value))
-		++value;
-
-	char *e_val = eol - 1;
-	//skip trailing whitespace
-	while (e_val > value && isspace((int)*e_val))
-		--e_val;
+	char *value = trim(eq + 1);
 	if (*value == '\0') {
 		LOG_WARN(
 			"card10.cfg",
@@ -187,16 +188,39 @@ parse_line(char *line, char *eol, int line_number, size_t line_offset)
 	add_config_pair(key, value, line_number, value_offset);
 }
 
-// convert windows line endings to unix line endings.
-// we don't care about the extra empty lines
-static void convert_crlf_to_lflf(char *buf, int n)
+typedef struct {
+	int line_number;
+	int file_offset;
+	int line_start;
+	char line[MAX_LINE_LENGTH + 1];
+	int line_length;
+} parser_state;
+
+int parse_character(char c, parser_state *s)
 {
-	while (n--) {
-		if (*buf == '\r') {
-			*buf = '\n';
+	if (c != '\r' && c != '\n') {
+		if (s->line_length == MAX_LINE_LENGTH) {
+			LOG_WARN(
+				"card10.cfg",
+				"line:%d: too long - aborting",
+				s->line_number
+			);
+			return -1;
+		}
+		s->line[s->line_length++] = c;
+	} else {
+		s->line[s->line_length] = 0;
+		//printf("New line: %s (%d %d)\n", s->line, s->line_number, s->line_start);
+		parse_line(s->line, s->line_number, s->line_start);
+		s->line_length = 0;
+		s->line_start  = s->file_offset + 1;
+		if (c == '\n') {
+			s->line_number++;
 		}
-		buf++;
 	}
+
+	s->file_offset++;
+	return 0;
 }
 
 // parses the entire config file
@@ -213,77 +237,21 @@ void load_config(void)
 		);
 		return;
 	}
-	char buf[MAX_LINE_LENGTH + 1];
-	int line_number    = 0;
-	size_t file_offset = 0;
+
+	char buf[128];
 	int nread;
+	parser_state s;
+	memset(&s, 0, sizeof(s));
+	s.line_number = 1;
 	do {
-		nread = epic_file_read(fd, buf, MAX_LINE_LENGTH);
-		convert_crlf_to_lflf(buf, nread);
-		if (nread < MAX_LINE_LENGTH) {
-			//add fake EOL to ensure termination
-			buf[nread++] = '\n';
+		nread = epic_file_read(fd, buf, sizeof(buf));
+		int i;
+		for (i = 0; i < nread; i++) {
+			parse_character(buf[i], &s);
 		}
-		//zero-terminate buffer
-		buf[nread]   = '\0';
-		char *line   = buf;
-		char *eol    = NULL;
-		int last_eol = 0;
-		while (line) {
-			//line points one character past the last (if any) '\n' hence '- 1'
-			last_eol = line - buf - 1;
-			eol      = strchr(line, '\n');
-			++line_number;
-			if (eol) {
-				*eol = '\0';
-				parse_line(line, eol, line_number, file_offset);
-				file_offset += eol - line + 1;
-				line = eol + 1;
-				continue;
-			}
-			if (line == buf) {
-				//line did not fit into buf
-				LOG_WARN(
-					"card10.cfg",
-					"line:%d: too long - aborting",
-					line_number
-				);
-				return;
-			}
-			int seek_back = last_eol - nread;
+	} while (nread == sizeof(buf));
+	parse_character('\n', &s);
 
-			LOG_DEBUG(
-				"card10.cfg",
-				"nread, last_eol, seek_back: %d,%d,%d",
-				nread,
-				last_eol,
-				seek_back
-			);
-			assert(seek_back <= 0);
-			if (!seek_back) {
-				break;
-			}
-
-			int rc = epic_file_seek(fd, seek_back, SEEK_CUR);
-			if (rc < 0) {
-				LOG_ERR("card10.cfg", "seek failed, aborting");
-				return;
-			}
-			char newline;
-			rc = epic_file_read(fd, &newline, 1);
-			if (rc < 0 || (newline != '\n' && newline != '\r')) {
-				LOG_ERR("card10.cfg", "read failed, aborting");
-				LOG_DEBUG(
-					"card10.cfg",
-					"read failed at read-back of newline: rc: %d read: %d",
-					rc,
-					(int)newline
-				);
-				return;
-			}
-			break;
-		}
-	} while (nread == MAX_LINE_LENGTH);
 	epic_file_close(fd);
 }
 
-- 
GitLab