diff options
Diffstat (limited to 'src/lexer.c')
-rw-r--r-- | src/lexer.c | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..5863ef6 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,254 @@ +/* +Copyright (C) 2017 Paweł Redman + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 3 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "shared.h" +#include <ctype.h> + +void vstr_init(vstr_t *vstr) +{ + memset(vstr, 0, sizeof(*vstr)); +} + +void vstr_destroy(vstr_t *vstr) +{ + free(vstr->data); +} + +void vstr_clear(vstr_t *vstr) +{ + vstr->size = 0; +} + +static int vstr_enlarge(vstr_t *vstr) +{ + size_t new_alloc; + + new_alloc = (vstr->alloc + 4) * 3 / 2; + + vstr->data = realloc(vstr->data, new_alloc); + if (!vstr->data) + return 1; + + vstr->alloc = new_alloc; + return 0; +} + +int vstr_putc(vstr_t *vstr, char ch) +{ + // note: keep at least one character free at all times for vstr_termz + if (vstr->size + 2 > vstr->alloc) + if (vstr_enlarge(vstr)) + return -ENOMEM; + + vstr->data[vstr->size] = ch; + vstr->size++; + return 0; +} + +int vstr_cmp(vstr_t *vstr, const char *str) +{ + size_t len; + + len = strlen(str); + if (vstr->size < len) + len = vstr->size; + + return memcmp(vstr->data, str, len); +} + +char *vstr_strdup(vstr_t *vstr) +{ + char *str; + + str = malloc(vstr->size + 1); + if (!str) + return NULL; + + memcpy(str, vstr->data, vstr->size); + str[vstr->size] = 0; + + return str; +} + +char *vstr_to_cstr(vstr_t *vstr) +{ + vstr->data[vstr->size] = '\0'; + return vstr->data; +} + +int lexer_open(lexer_state_t *ls, const char *path, vstr_t *token) +{ + ls->error = 0; + ls->path = path; + + ls->fp = fopen(path, "r"); + if (!ls->fp) + return -errno; + + ls->eof = false; + + ls->token = token; + ls->buf_e = ls->buf_c = ls->buf; + ls->cc = ls->lc = ls->Cc = 0; + + ls->in_token = false; + ls->in_quote = false; + ls->in_comment = false; + + return 0; +} + +void lexer_close(lexer_state_t *ls) +{ + vstr_destroy(ls->token); + fclose(ls->fp); +} + +//RETURN VALUES +// <0 on error +// 0 on success +// note: sets ls->eof to true if there's no more data left +static int fill_buffer(lexer_state_t *ls) +{ + size_t read; + + read = fread(ls->buf, 1, sizeof(ls->buf), ls->fp); + if (read < sizeof(ls->buf)) { + if (ferror(ls->fp)) + return -errno; + + ls->eof = true; + } + + ls->buf_c = ls->buf; + ls->buf_e = ls->buf + read; + return 0; +} + +//RETURN VALUES +// -ENOMEM +// -EAGAIN when the buffer runs out +// 0 on success +// 1 when no data is left +static int read_buffer(lexer_state_t *ls) +{ + while (ls->buf_c < ls->buf_e) { + bool ret_token = false; + + if (*ls->buf_c == '\n') { + ls->lc++; + ls->Cc = 0; + } + + if (ls->in_comment) { + if (*ls->buf_c == '\n') + ls->in_comment = false; + } else if (isspace(*ls->buf_c) && !ls->in_quote) { + if (ls->in_token) { + ls->in_token = false; + ret_token = true; + } + } else if (*ls->buf_c == '/' && (ls->cc && ls->last == '/')) { + ls->in_comment = true; + ls->in_token = false; + + ls->token->size--; // remove the first slash + if (ls->token->size) + ret_token = true; + } else if (*ls->buf_c == '\"' && + (ls->cc && ls->last != '\\')) { + ls->in_quote = !ls->in_quote; + + if (!ls->in_quote) { + ls->in_token = false; + ret_token = true; + } + } else { + if (!ls->in_token) { + ls->in_token = true; + } + } + + if (ls->in_token) + if (vstr_putc(ls->token, *ls->buf_c)) { + ls->error = ENOMEM; + return -ENOMEM; + } + + ls->last = *ls->buf_c; + ls->buf_c++; + ls->cc++; + ls->Cc++; + + if (ret_token) + return 0; + } + + if (ls->eof) { + if (ls->token->size > 0) + return 0; + return 1; + } + + return -EAGAIN; +} + +//RETURN VALUES +// <0 on error +// 0 on success +// 1 when no data is left +int lexer_get_token(lexer_state_t *ls) +{ + int ret; + + vstr_clear(ls->token); + + while (1) { + ret = read_buffer(ls); + if (ret != -EAGAIN) + return ret; + + ret = fill_buffer(ls); + if (ret < 0) + return ret; + } +} + +void lexer_perror(lexer_state_t *ls, const char *fmt, ...) +{ + va_list vl; + + eprintf("%s:%zu:%zu: ", ls->path, ls->lc + 1, ls->Cc + 1); + + if (ls->error) { + perror(NULL); + } else { + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + } +} + +void lexer_perror_eg(lexer_state_t *ls, const char *expected) +{ + if (ls->eof && ls->buf_c == ls->buf_e) + lexer_perror(ls, "expected %s, got EOF\n", expected); + else + lexer_perror(ls, "expected %s, got \"%s\"\n", expected, + vstr_to_cstr(ls->token)); +} |