summaryrefslogtreecommitdiff
path: root/static-analysis/lexer.c
blob: 2c64ab74757f1daa79768060aabf83a0b83a118d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include "compiler.h"

struct lexeme LEXEMES[MAX_LEXEMES] = {0};
size_t N_LEXEMES = 0;

void lex(char *s) {
    for (char *ptr = s; *ptr; ptr++)
        *ptr = tolower(*ptr);

    char *LAST_LINE = s;
    size_t LAST_LINE_NO = 1;
    for (; *s; s++) {
        enum lex_label label = LEX_NONE;
        char *start = s;
        // TODO: at the very least, also need to lex here: string literals,
        // character literals, hex literals, int literals, identifiers.
        // Note in these examples below, the first four are cases where we
        // throw away some characters (indicated by label = LEX_NONE). In the
        // final example we lex operation characters like [ and !=.
        assert(!"unimplemented");
        if (isspace(*s));
        else if (prefix(s, "/*")) {
            for (; *s && !prefix(s, "*/"); s++);
            if (*s) s++;
        } else if (prefix(s, "//")) {
            for (; *s && *s != '\n'; s++);
        } else if (s[0] == '#') {
            for (; *s && *s != '\n'; s++)
                s += (*s == '\\');
        } else {
            label = LEX_OP;
            if (strchr("()[]{}.:?,;", *s))
                s++;
            else if (prefix(s, "->"))
                s += 2;
            else if (strchr("-+&|", *s) && *s == s[1])
                s += 2;
            else if (prefix(s, "<<") || prefix(s, ">>"))
                s += 2 + (s[2] == '=');
            else
                s += 1 + (s[1] == '=');
        }
        // NOTE: This shared code builds the lexeme for you & inserts it into
        // the array of lexemes. Basically, you need to point @s to one-past
        // the last character you want to be part of the lexeme, and set @label
        // to whatever label you want it to have.
        if (label == LEX_NONE) continue;
        assert(N_LEXEMES < sizeof(LEXEMES) / sizeof(LEXEMES[0]));
        LEXEMES[N_LEXEMES].string = strndup(start, (size_t)(s - start));
        LEXEMES[N_LEXEMES].label = label;
        for (; LAST_LINE != start; LAST_LINE++)
             LAST_LINE_NO += (*LAST_LINE == '\n');
        LEXEMES[N_LEXEMES++].line_no = LAST_LINE_NO;
        s--;
    }
}
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback