summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorPaweł Redman <pawel.redman@gmail.com>2017-04-05 23:09:06 +0200
committerPaweł Redman <pawel.redman@gmail.com>2017-04-05 23:09:06 +0200
commitda4626cb5a741a9b7861dd54f8570a00753a5d92 (patch)
tree3731ac96db396ceaf83d0c867aff06810a08daa3 /src/lexer.c
Initial commit.
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c254
1 files changed, 254 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..5863ef6
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,254 @@
+/*
+Copyright (C) 2017 Paweł Redman
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 3
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "shared.h"
+#include <ctype.h>
+
+void vstr_init(vstr_t *vstr)
+{
+ memset(vstr, 0, sizeof(*vstr));
+}
+
+void vstr_destroy(vstr_t *vstr)
+{
+ free(vstr->data);
+}
+
+void vstr_clear(vstr_t *vstr)
+{
+ vstr->size = 0;
+}
+
+static int vstr_enlarge(vstr_t *vstr)
+{
+ size_t new_alloc;
+
+ new_alloc = (vstr->alloc + 4) * 3 / 2;
+
+ vstr->data = realloc(vstr->data, new_alloc);
+ if (!vstr->data)
+ return 1;
+
+ vstr->alloc = new_alloc;
+ return 0;
+}
+
+int vstr_putc(vstr_t *vstr, char ch)
+{
+ // note: keep at least one character free at all times for vstr_termz
+ if (vstr->size + 2 > vstr->alloc)
+ if (vstr_enlarge(vstr))
+ return -ENOMEM;
+
+ vstr->data[vstr->size] = ch;
+ vstr->size++;
+ return 0;
+}
+
+int vstr_cmp(vstr_t *vstr, const char *str)
+{
+ size_t len;
+
+ len = strlen(str);
+ if (vstr->size < len)
+ len = vstr->size;
+
+ return memcmp(vstr->data, str, len);
+}
+
+char *vstr_strdup(vstr_t *vstr)
+{
+ char *str;
+
+ str = malloc(vstr->size + 1);
+ if (!str)
+ return NULL;
+
+ memcpy(str, vstr->data, vstr->size);
+ str[vstr->size] = 0;
+
+ return str;
+}
+
+char *vstr_to_cstr(vstr_t *vstr)
+{
+ vstr->data[vstr->size] = '\0';
+ return vstr->data;
+}
+
+int lexer_open(lexer_state_t *ls, const char *path, vstr_t *token)
+{
+ ls->error = 0;
+ ls->path = path;
+
+ ls->fp = fopen(path, "r");
+ if (!ls->fp)
+ return -errno;
+
+ ls->eof = false;
+
+ ls->token = token;
+ ls->buf_e = ls->buf_c = ls->buf;
+ ls->cc = ls->lc = ls->Cc = 0;
+
+ ls->in_token = false;
+ ls->in_quote = false;
+ ls->in_comment = false;
+
+ return 0;
+}
+
+void lexer_close(lexer_state_t *ls)
+{
+ vstr_destroy(ls->token);
+ fclose(ls->fp);
+}
+
+//RETURN VALUES
+// <0 on error
+// 0 on success
+// note: sets ls->eof to true if there's no more data left
+static int fill_buffer(lexer_state_t *ls)
+{
+ size_t read;
+
+ read = fread(ls->buf, 1, sizeof(ls->buf), ls->fp);
+ if (read < sizeof(ls->buf)) {
+ if (ferror(ls->fp))
+ return -errno;
+
+ ls->eof = true;
+ }
+
+ ls->buf_c = ls->buf;
+ ls->buf_e = ls->buf + read;
+ return 0;
+}
+
+//RETURN VALUES
+// -ENOMEM
+// -EAGAIN when the buffer runs out
+// 0 on success
+// 1 when no data is left
+static int read_buffer(lexer_state_t *ls)
+{
+ while (ls->buf_c < ls->buf_e) {
+ bool ret_token = false;
+
+ if (*ls->buf_c == '\n') {
+ ls->lc++;
+ ls->Cc = 0;
+ }
+
+ if (ls->in_comment) {
+ if (*ls->buf_c == '\n')
+ ls->in_comment = false;
+ } else if (isspace(*ls->buf_c) && !ls->in_quote) {
+ if (ls->in_token) {
+ ls->in_token = false;
+ ret_token = true;
+ }
+ } else if (*ls->buf_c == '/' && (ls->cc && ls->last == '/')) {
+ ls->in_comment = true;
+ ls->in_token = false;
+
+ ls->token->size--; // remove the first slash
+ if (ls->token->size)
+ ret_token = true;
+ } else if (*ls->buf_c == '\"' &&
+ (ls->cc && ls->last != '\\')) {
+ ls->in_quote = !ls->in_quote;
+
+ if (!ls->in_quote) {
+ ls->in_token = false;
+ ret_token = true;
+ }
+ } else {
+ if (!ls->in_token) {
+ ls->in_token = true;
+ }
+ }
+
+ if (ls->in_token)
+ if (vstr_putc(ls->token, *ls->buf_c)) {
+ ls->error = ENOMEM;
+ return -ENOMEM;
+ }
+
+ ls->last = *ls->buf_c;
+ ls->buf_c++;
+ ls->cc++;
+ ls->Cc++;
+
+ if (ret_token)
+ return 0;
+ }
+
+ if (ls->eof) {
+ if (ls->token->size > 0)
+ return 0;
+ return 1;
+ }
+
+ return -EAGAIN;
+}
+
+//RETURN VALUES
+// <0 on error
+// 0 on success
+// 1 when no data is left
+int lexer_get_token(lexer_state_t *ls)
+{
+ int ret;
+
+ vstr_clear(ls->token);
+
+ while (1) {
+ ret = read_buffer(ls);
+ if (ret != -EAGAIN)
+ return ret;
+
+ ret = fill_buffer(ls);
+ if (ret < 0)
+ return ret;
+ }
+}
+
+void lexer_perror(lexer_state_t *ls, const char *fmt, ...)
+{
+ va_list vl;
+
+ eprintf("%s:%zu:%zu: ", ls->path, ls->lc + 1, ls->Cc + 1);
+
+ if (ls->error) {
+ perror(NULL);
+ } else {
+ va_start(vl, fmt);
+ vfprintf(stderr, fmt, vl);
+ va_end(vl);
+ }
+}
+
+void lexer_perror_eg(lexer_state_t *ls, const char *expected)
+{
+ if (ls->eof && ls->buf_c == ls->buf_e)
+ lexer_perror(ls, "expected %s, got EOF\n", expected);
+ else
+ lexer_perror(ls, "expected %s, got \"%s\"\n", expected,
+ vstr_to_cstr(ls->token));
+}