summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorPaweł Redman <pawel.redman@gmail.com>2016-12-20 09:35:09 +0100
committerPaweł Redman <pawel.redman@gmail.com>2016-12-20 09:35:09 +0100
commit337247d56e3300d17301daa7fa3fbfb6084cd540 (patch)
tree3299b79b08ff2112c21f6a3d1b33ff18ce88f2f5 /src/lexer.c
Initial commit.
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c253
1 files changed, 253 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..41d6784
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,253 @@
+/*
+Copyright (C) 2016 Paweł Redman
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 3
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "common.h"
+#include <ctype.h>
+
+int lexer_open(lexer_state_t *ls, const char *path, vstr_t *token)
+{
+ ls->error = 0;
+ ls->path = path;
+
+ ls->fp = fopen(path, "r");
+ if (!ls->fp)
+ return -errno;
+
+ ls->eof = false;
+
+ ls->token = token;
+ ls->buf_e = ls->buf_c = ls->buf;
+ ls->cc = ls->lc = ls->Cc = 0;
+
+ ls->in_token = false;
+ ls->in_quote = false;
+ ls->in_comment = false;
+
+ return 0;
+}
+
+//RETURN VALUES
+// <0 on error
+// 0 on success
+// note: sets ls->eof to true if there's no more data left
+static int fill_buffer(lexer_state_t *ls)
+{
+ size_t read;
+
+ read = fread(ls->buf, 1, sizeof(ls->buf), ls->fp);
+ debug("read = %zu\n", read);
+ if (read < sizeof(ls->buf)) {
+ if (ferror(ls->fp)) {
+ fclose(ls->fp);
+ return -errno;
+ }
+
+ ls->eof = true;
+ fclose(ls->fp);
+ debug("no data left, ls->fp closed\n");
+ }
+
+ ls->buf_c = ls->buf;
+ ls->buf_e = ls->buf + read;
+ return 0;
+}
+
+//RETURN VALUES
+// -ENOMEM
+// -EAGAIN when the buffer runs out
+// 0 on success
+// 1 when no data is left
+static int read_buffer(lexer_state_t *ls)
+{
+ while (ls->buf_c < ls->buf_e) {
+ bool ret_token = false;
+
+ debug("*ls->buf_c = %c, ls->last = %c\n", *ls->buf_c, ls->last);
+
+ if (*ls->buf_c == '\n') {
+ ls->lc++;
+ ls->Cc = 0;
+ }
+
+ if (ls->in_comment) {
+ if (*ls->buf_c == '\n')
+ ls->in_comment = false;
+ } else if (isspace(*ls->buf_c) && !ls->in_quote) {
+ if (ls->in_token) {
+ ls->in_token = false;
+ ret_token = true;
+ }
+ } else if (*ls->buf_c == '/' && ls->last == '/') {
+ ls->in_comment = true;
+ ls->in_token = false;
+
+ ls->token->size--; // remove the first slash
+ if (ls->token->size)
+ ret_token = true;
+ } else if (*ls->buf_c == '\"' &&
+ (ls->cc && ls->last != '\\')) {
+ ls->in_quote = !ls->in_quote;
+
+ if (!ls->in_quote) {
+ ls->in_token = false;
+ ret_token = true;
+ }
+ } else {
+ if (!ls->in_token) {
+ ls->in_token = true;
+ }
+ }
+
+ if (ls->in_token)
+ if (vstr_putc(ls->token, *ls->buf_c)) {
+ ls->error = ENOMEM;
+ return -ENOMEM;
+ }
+
+ ls->last = *ls->buf_c;
+ ls->buf_c++;
+ ls->cc++;
+ ls->Cc++;
+
+ if (ret_token)
+ return 0;
+ }
+
+ if (ls->eof) {
+ if (ls->token->size > 0)
+ return 0;
+ return 1;
+ }
+
+ return -EAGAIN;
+}
+
+//RETURN VALUES
+// <0 on error
+// 0 on success
+// 1 when no data is left
+int lexer_get_token(lexer_state_t *ls)
+{
+ int ret;
+
+ vstr_clear(ls->token);
+
+ while (1) {
+ ret = read_buffer(ls);
+ debug("read_buffer = %i\n", ret);
+ if (ret != -EAGAIN)
+ return ret;
+
+ ret = fill_buffer(ls);
+ debug("fill_buffer = %i\n", ret);
+ if (ret < 0)
+ return ret;
+ }
+}
+
+void lexer_perror(lexer_state_t *ls, const char *fmt, ...)
+{
+ va_list vl;
+
+ fprintf(stderr, "%s:%zu:%zu: ", ls->path, ls->lc + 1, ls->Cc + 1);
+
+ if (ls->error) {
+ perror(NULL);
+ } else {
+ va_start(vl, fmt);
+ vfprintf(stderr, fmt, vl);
+ va_end(vl);
+ }
+}
+
+void lexer_perror_eg(lexer_state_t *ls, const char *expected)
+{
+ if (ls->eof && ls->buf_c == ls->buf_e)
+ lexer_perror(ls, "expected %s, got EOF\n", expected);
+ else {
+ vstr_termz(ls->token);
+ lexer_perror(ls, "expected %s, got \"%s\"\n", expected,
+ ls->token->data);
+ }
+}
+
+int lexer_assert(lexer_state_t *ls, const char *match, const char *desc)
+{
+ int ret;
+
+ ret = lexer_get_token(ls);
+ if (ret) {
+ lexer_perror(ls, "expected %s%s\"%s\", got EOF\n",
+ (desc ? desc : ""), (desc ? " " : ""), match);
+ return 1;
+ }
+
+ if (vstr_cmp(ls->token, match)) {
+ vstr_termz(ls->token);
+ lexer_perror(ls, "expected %s%s\"%s\", got \"%s\"\n",
+ (desc ? desc : ""), (desc ? " " : ""), match,
+ ls->token->data);
+ return 1;
+ }
+
+ return 0;
+}
+
+//RETURN VALUE
+// -1 on eof (also success)
+// 0 on success
+// 1 on error
+int lexer_assert_or_eof(lexer_state_t *ls, const char *match, const char *desc)
+{
+ int ret;
+
+ ret = lexer_get_token(ls);
+ if (ret < 0) {
+ perror("lexer");
+ return 1;
+ }
+
+ if (ret == 1) {
+ return -1;
+ }
+
+ if (vstr_cmp(ls->token, match)) {
+ lexer_perror(ls, "expected %s%s\"%s\" or EOF, got \"%.*s\"\n",
+ (desc ? desc : ""), (desc ? " " : ""), match,
+ (int)ls->token->size, ls->token->data);
+ return 1;
+ }
+
+ return 0;
+}
+
+int lexer_get_floats(lexer_state_t *ls, float *out, size_t count)
+{
+ size_t i;
+
+ for (i = 0; i < count; i++) {
+ if (lexer_get_token(ls)) {
+ lexer_perror_eg(ls, "a number");
+ return 1;
+ }
+
+ out[i] = vstr_atof(ls->token);
+ }
+
+ return 0;
+}