From cf413fb1d821d944e0202b16400950191b9c9e13 Mon Sep 17 00:00:00 2001 From: Akuli Date: Sun, 28 Jul 2024 16:00:43 +0300 Subject: [PATCH] Read assertion from file with offsets --- src/jou_compiler.h | 11 +++++- src/parse.c | 90 ++++++++++++++++++++++++++++------------------ src/tokenize.c | 14 +++++++- 3 files changed, 79 insertions(+), 36 deletions(-) diff --git a/src/jou_compiler.h b/src/jou_compiler.h index c505e4a4..3284dd49 100644 --- a/src/jou_compiler.h +++ b/src/jou_compiler.h @@ -91,7 +91,6 @@ struct Token { TOKEN_OPERATOR, TOKEN_END_OF_FILE, // Marks the end of an array of Token } type; - Location location; union { int16_t short_value; // TOKEN_SHORT int32_t int_value; // TOKEN_INT @@ -102,6 +101,16 @@ struct Token { char name[100]; // TOKEN_NAME and TOKEN_KEYWORD. Also TOKEN_DOUBLE & TOKEN_FLOAT (LLVM wants a string anyway) char operator[4]; // TOKEN_OPERATOR } data; + + /* + Contains only the line number, not column. In some cases you can use + start_offset and end_offset to work around that. + */ + Location location; + + // Number of bytes from start of file to start/end of this token. + long start_offset; + long end_offset; }; // Constants can appear in AST and also compilation steps after AST. diff --git a/src/parse.c b/src/parse.c index a821f33a..153ed818 100644 --- a/src/parse.c +++ b/src/parse.c @@ -725,42 +725,65 @@ static enum AstStatementKind determine_the_kind_of_a_statement_that_starts_with_ return AST_STMT_EXPRESSION_STATEMENT; } -// TODO: this function is just bad... -static char *read_assertion_from_file(Location start, Location end) +static char *read_assertion_from_file(Location error_location, const Token *start, const Token *end) { - assert(start.filename == end.filename); - FILE *f = fopen(start.filename, "rb"); - assert(f); - - char line[1024]; - int lineno = 1; - while (lineno < start.lineno) { - fgets(line, sizeof line, f); - lineno++; - } + FILE *f = fopen(error_location.filename, "rb"); + if (!f) + goto error; + + List(char) result = {0}; + + long ostart, oend; // offsets within file to include + for (const Token *t = start; t < end; t++) { + assert(t->start_offset < t->end_offset); + + if (t == start || t->location.lineno != t[-1].location.lineno) { + // First token of a new line + ostart = t->start_offset; + oend = t->end_offset; + } else { + // Include more tokens from the line of code so that this token is added too. + // We cannot include the entire line because it might contain comments. + assert(oend <= t->start_offset); + oend = t->end_offset; + } - List(char) str = {0}; - while (lineno <= end.lineno) { - memset(line, 0, sizeof line); - fgets(line, sizeof line, f); - lineno++; - - if (strstr(line, "#")) - *strstr(line, "#") = '\0'; - trim_whitespace(line); - // Add spaces between the lines, but not after '(' or before ')' - if (line[0] != ')' && str.len >= 1 && str.ptr[str.len-1] != '(') - AppendStr(&str, " "); - AppendStr(&str, line); + if (t == end-1 || t[0].location.lineno != t[1].location.lineno) { + // Last token of a line. Read code from file. + char *line = malloc(oend - ostart + 1); + if (!line) + goto error; + if (fseek(f, ostart, SEEK_SET) < 0) + goto error; + if (result.len > 0) + Append(&result, '\n'); + for (long i = ostart; i < oend; i++) { + int c = fgetc(f); + if (c == EOF || c == '\r' || c == '\n') + goto error; + Append(&result, (char)c); + } + } } - fclose(f); - Append(&str, '\0'); + /* + Join lines with spaces, but do not put spaces just after '(' or before ')'. + This makes multiline asserts nicer, so "assert (\n foo and bar\n)" + shows "foo and bar" as the assert condition. + */ + Append(&result, '\0'); + char *p; + while ((p = strstr(result.ptr, "\n"))) { + if ((p > result.ptr && p[-1] == '(') || (p[1] == ')')) { + memmove(p, p+1, strlen(p)); // delete newline character at p + } else { + *p = ' '; // join lines with a space + } + } + return result.ptr; - if(!strncmp(str.ptr, "assert",6)) - memmove(str.ptr, &str.ptr[6], strlen(&str.ptr[6]) + 1); - trim_whitespace(str.ptr); - return str.ptr; +error: + fail(error_location, "internal error: cannot read assertion text from file"); } // does not eat a trailing newline @@ -777,10 +800,9 @@ static AstStatement parse_oneline_statement(ParserState *ps) } else if (is_keyword(ps->tokens, "assert")) { ps->tokens++; result.kind = AST_STMT_ASSERT; - Location start = ps->tokens->location; + const Token *condstart = ps->tokens; result.data.assertion.condition = parse_expression(ps); - Location end = ps->tokens->location; - result.data.assertion.condition_str = read_assertion_from_file(start, end); + result.data.assertion.condition_str = read_assertion_from_file(result.location, condstart, ps->tokens); } else if (is_keyword(ps->tokens, "pass")) { ps->tokens++; result.kind = AST_STMT_PASS; diff --git a/src/tokenize.c b/src/tokenize.c index ba9e2907..70014562 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -420,10 +420,20 @@ static void handle_parentheses(struct State *st, const struct Token *t) } } +// Returns the offset of the current location as number of bytes from start of file. +static long get_offset(const struct State *st) +{ + long off = ftell(st->f); + if (off < 0) + fail(st->location, "internal error: ftell() failed"); + + return off - st->pushback.len; +} + static Token read_token(struct State *st) { while(1) { - Token t = { .location = st->location }; + Token t = { .location = st->location, .start_offset = get_offset(st) }; char c = read_byte(st); switch(c) { @@ -482,6 +492,8 @@ static Token read_token(struct State *st) } break; } + + t.end_offset = get_offset(st); return t; } }