diff options
author | Matthew Sotoudeh <matthew@masot.net> | 2024-03-11 16:33:24 -0700 |
---|---|---|
committer | Matthew Sotoudeh <matthew@masot.net> | 2024-03-11 16:33:24 -0700 |
commit | a9292a98cc6c65e2a4ad6da20937ef7568a4143d (patch) | |
tree | f921866c475208359285ff3f606c0fafa1812a32 | |
parent | 35fa21e59ad44de3ac5d075a3c1ae60d462a1a13 (diff) |
earlpy
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | DESIGN.txt | 57 | ||||
-rw-r--r-- | earlpy/earlpy.py | 146 | ||||
-rw-r--r-- | examples/check_errors.py | 14 | ||||
-rw-r--r-- | examples/dash_var.c | 678 | ||||
-rw-r--r-- | examples/error.c | 2 | ||||
-rw-r--r-- | examples/simple.c | 2 | ||||
-rw-r--r-- | examples/small_dash_var.c | 678 | ||||
-rw-r--r-- | examples/tiny.c | 430 | ||||
-rwxr-xr-x | extras/ftdetect/earlpy.vim | 1 | ||||
-rw-r--r-- | extras/syntax/earlpy.vim | 20 | ||||
-rw-r--r-- | grammars/c/disambiguate.c | 84 | ||||
-rw-r--r-- | grammars/c/grammar.earlpy | 265 | ||||
-rw-r--r-- | grammars/c/grammar.txt | 130 | ||||
-rw-r--r-- | grammars/c/preprocess.c | 45 | ||||
-rw-r--r-- | parse.py | 6 |
16 files changed, 2375 insertions, 184 deletions
@@ -3,3 +3,4 @@ __pycache__ parser parser.c parser.l +!earlpy/parser.c @@ -1,3 +1,60 @@ +====== Performance Issues ======== +The biggest performance issues involve parsing grammar + + LIST nonterm + LIST , EXPR + EXPR + +With a huge input of + + A , A , A , A , ... + +Naively, the parser will try to match an EXPR starting at each of the As. + +I guess this is because even if you are associating so far like + + (((A, A), A), A), ... + +It doesn't know if, later on in the parse, you might want to reassociate 'across +the boundary': + + (((A, A), A), (A, B)), ... + +At this point, I don't think it's worth trying to do much more general-purpose +optimization for this case. Instead, I think we should have callouts to user +code to give hints. Two possible hint types: + + 1. "Parse this region as this parse tree." Easy to use: when you get there, + just skip all of those indices and complete that tree with any watchers. + (Might need to do prediction at the first index.) + +Actually, that's probably by far the easiest. + +====== More Disambiguation Issues ======== +What if we poison X, use it in completion Y, but then overwrite X with a +nonpoisoned Z? Then Y will be incorrectly poisoned... + +====== More Disambiguation Issues ======== +Consider + + STMTS nonterm .start + STMTS STMT + STMT + +vs. + + STMTS nonterm .start + STMT + STMTS STMT + +Swapping this can actually impact what matches happen in STMT :O + +====== More Disambiguation Issues ======== +Associativity and precedence can have weird interplay + +E.g., maybe you can get Stmts(Error(1), Stmt(2, 3)) which has better +associativity than Stmts(Stmt(1, 2), Stmt(3)) + ====== Disambiguation Issues ======== Consider two possible parses of 0 + 1 * 5 + 4 ( ( 0 + ( 1 * 5 ) ) + 4 ) diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py index 3b0deab..2944c51 100644 --- a/earlpy/earlpy.py +++ b/earlpy/earlpy.py @@ -9,9 +9,8 @@ DIR = pathlib.Path(__file__).parent.resolve() class Parser: def __init__(self, parser_dir): - assert parser_dir and parser_dir[0] != '/' - parser_dir = parser_dir - files = sorted([f"{parser_dir}/grammar.txt", + assert parser_dir and parser_dir != '/' + files = sorted([f"{parser_dir}/grammar.earlpy", *glob(f"{parser_dir}/*.c"), f"{DIR}/parser.c", __file__]) @@ -27,7 +26,7 @@ class Parser: if open(lex_path, "r").readline()[3:][:-3].strip() == hashes: already_built = True - lines = self.parse_grammar(f"{parser_dir}/grammar.txt") + lines = self.parse_grammar(f"{parser_dir}/grammar.earlpy") if not already_built: if glob(f"{parser_dir}/parser"): subprocess.run(f"rm {parser_dir}/parser", shell=True) @@ -43,7 +42,7 @@ class Parser: shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if res.returncode: print(res.stderr.decode("utf-8")) assert res.returncode == 0 - res = subprocess.run(f"gcc -O3 {parser_dir}/parser.c -o {parser_dir}/parser", + res = subprocess.run(f"gcc -g -O3 {parser_dir}/parser.c -ljemalloc -o {parser_dir}/parser", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if res.returncode: print(res.stderr.decode("utf-8")) assert res.returncode == 0 @@ -66,12 +65,20 @@ class Parser: raise ValueError contents = open(path, "r").read() + offset_to_line = dict() + line = 1 + for i, c in enumerate(open(path, "rb").read()): + offset_to_line[i] = line + if c == '\n' or chr(c) == '\n': line += 1 n_tokens, = struct.unpack("Q", res.stdout[:8]) # symbol id, start idx, length tokens = list(struct.iter_unpack("QQQ", res.stdout[8:8+(8*3*n_tokens)])) - tokens = [(token, contents[token[1]:token[1]+token[2]]) - for token in tokens] + tokens = [Token(self.id_to_symbol[symbol], + contents[offset:offset+length], + offset_to_line[offset], + path) + for (symbol, offset, length) in tokens] # production id nodes = [t[0] for t in struct.iter_unpack("Q", res.stdout[8+(8*3*n_tokens):])] # print(nodes) @@ -83,13 +90,7 @@ class Parser: stack = [root] while stack: node = stack[-1] - # print(len(stack)) - # if isinstance(node, tuple): - # print("\t", node) - # else: - # print("\t", node.symbol.name, [s.name for s in node.production]) - - if (isinstance(node, tuple) + if (isinstance(node, Token) or len(node.production) == len(node.contents)): stack.pop() if stack: stack[-1].contents.append(node) @@ -220,11 +221,16 @@ class Parser: put(", \"" + symbol.name + "\"") put(" };") for symbol in ordered_symbols: - if symbol.name.isalnum(): + if symbol.name.replace("_", "").isalnum(): putl(f"#define SYMBOL_{symbol.name} {symbol.id}") if symbol.is_start: putl(f"#define START_SYMBOL {symbol.id}") + putl("char SYMBOL_TO_POISON[] = { 0") + for symbol in ordered_symbols: + put(", " + ("1" if symbol.poisoned else "0")) + put(" };") + putl("prod_id_t SYMBOL_ID_TO_PRODUCTION_IDS[N_SYMBOLS][MAX_N_PRODUCTIONS] = { {0}") # [(production, Symbol), ...] self.productions = [([], None, None)] @@ -265,6 +271,37 @@ class Parser: if i != 0: put(f", {symbol.id}") put(" };") + # Production hints: for this production, what does the leading symbol + # need to be? + # symbol -> symbol | True (multiple) + symbol_to_first = {symbol: symbol + for symbol in self.id_to_symbol.values() + if symbol.kind != "nonterm"} + fixedpoint = False + while not fixedpoint: + fixedpoint = True + for symbol in self.id_to_symbol.values(): + if symbol.kind != "nonterm": continue + head_symbols = [self.name_to_symbol[production[0]] + for production in symbol.contents] + firsts = [symbol_to_first.get(head, None) + for head in head_symbols] + new_first = (firsts[0] if all(f == firsts[0] for f in firsts) + else True) + if symbol_to_first.get(symbol, None) != new_first: + symbol_to_first[symbol] = new_first + fixedpoint = False + + putl("symbol_id_t PRODUCTION_ID_TO_FIRST[N_PRODUCTIONS] = { 0") + for i, (production, _, _) in enumerate(self.productions): + if i == 0: continue + if not production or symbol_to_first.get(production[0], True) is True: + put(", 0") + else: + put(f", {symbol_to_first[production[0]].id}") + put(" };") + + ##### DONE: output the lexer putl("void lex_symbol(symbol_id_t);") putl("%}") putl("%%") @@ -292,6 +329,7 @@ class Symbol: self.name = parts[0] self.kind = parts[1] self.is_start = ".start" in parts[2:] + self.poisoned = ".poison" in parts[2:] self.contents = [] self.production_names = [] self.id = None @@ -321,11 +359,87 @@ class Node: self.production = production self.contents = [] + def line_numbers(self): + return self.contents[0].line_numbers() + + def max_line_numbers(self): + return self.contents[-1].max_line_numbers() + + def file_name(self): + return self.contents[-1].file_name() + def pprint(self): def pprint(other): if isinstance(other, Node): return other.pprint() - return other[1] + return other.pprint() if len(self.contents) == 1: return pprint(self.contents[0]) return '(' + ' '.join(map(pprint, self.contents)) + ')' + + def print_tree(self, depth=0): + print((' ' * depth) + self.symbol.name) + for arg in self.contents: + arg.print_tree(depth + 2) + + def isa(self, *patterns): + for pattern in patterns: + if "->" in pattern: + symbol, production = pattern.split("->") + symbol = symbol.strip() + if symbol != self.symbol.name: continue + production = production.split() + if production[-1] != "..." and len(production) != len(self.pprint_production().split()[2:]): + continue + for desired, real in zip(production, self.pprint_production().split()[2:]): + if desired == "...": return True + if desired != real: break + else: return True + else: + symbol = pattern.strip() + if symbol == self.symbol.name: + return True + return False + + def hasa(self, symbol): + return any(sub.name == symbol for sub in self.production) + + def pprint_production(self): + parts = [] + for s in self.production: + if "::" in s.name: parts.append(s.name[s.name.index("::")+2:]) + else: parts.append(s.name) + return f"{self.symbol.name} -> {' '.join(parts)}" + + def find(self, kind, which=0, total=1): + found = [] + for s in self.subtrees(): + if s.symbol.name == kind: + found.append(s) + if len(found) != total: raise ValueError + return found[which] + + def subtrees(self): return self.contents + def __getitem__(self, i): return self.contents[i] + +class Token: + def __init__(self, symbol, string, line_number, file_name): + self.symbol = symbol + self.string = string + self.line_number = line_number + self.file_name_ = file_name + + def pprint(self): + return self.string + + def line_numbers(self): + return {self.line_number} + + def file_name(self): + return self.file_name_ + + def max_line_numbers(self): + return self.line_numbers() + + def print_tree(self, depth=0): + print((' ' * depth) + self.symbol.name , self.string , self.line_number) diff --git a/examples/check_errors.py b/examples/check_errors.py new file mode 100644 index 0000000..70f9f13 --- /dev/null +++ b/examples/check_errors.py @@ -0,0 +1,14 @@ +import earlpy +import sys + +p = earlpy.Parser("grammars/c") +node = p.parse_file(sys.argv[1]) +node.print_tree() +def visit(n): + if isinstance(n, earlpy.Token): + return + if n.symbol.name == "ERROR": + print(n.line_numbers(), n.pprint()) + else: + for a in n.contents: visit(a) +visit(node) diff --git a/examples/dash_var.c b/examples/dash_var.c new file mode 100644 index 0000000..b70d72c --- /dev/null +++ b/examples/dash_var.c @@ -0,0 +1,678 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1997-2005 + * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Kenneth Almquist. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_PATHS_H +#include <paths.h> +#endif + +/* + * Shell variables. + */ + +#include "shell.h" +#include "output.h" +#include "expand.h" +#include "nodes.h" /* for other headers */ +#include "exec.h" +#include "syntax.h" +#include "options.h" +#include "mail.h" +#include "var.h" +#include "memalloc.h" +#include "error.h" +#include "mystring.h" +#include "parser.h" +#include "show.h" +#ifndef SMALL +#include "myhistedit.h" +#endif +#include "system.h" + + +#define VTABSIZE 39 + + +struct localvar_list { + struct localvar_list *next; + struct localvar *lv; +}; + +MKINIT struct localvar_list *localvar_stack; + +const char defpathvar[] = + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; +char defifsvar[] = "IFS= \t\n"; +MKINIT char defoptindvar[] = "OPTIND=1"; + +int lineno; +char linenovar[sizeof("LINENO=")+sizeof(int)*CHAR_BIT/3+1] = "LINENO="; + +/* Some macros in var.h depend on the order, add new variables to the end. */ +struct var varinit[] = { +#if ATTY + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 }, +#endif + { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail }, + { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath }, + { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset }, +#ifdef WITH_LINENO + { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 }, +#endif +#ifndef SMALL + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "TERM\0", 0 }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "HISTSIZE\0", sethistsize }, +#endif +}; + +STATIC struct var *vartab[VTABSIZE]; + +STATIC struct var **hashvar(const char *); +STATIC int vpcmp(const void *, const void *); +STATIC struct var **findvar(struct var **, const char *); + +/* + * Initialize the varable symbol tables and import the environment + */ + +#ifdef mkinit +INCLUDE <unistd.h> +INCLUDE <sys/types.h> +INCLUDE <sys/stat.h> +INCLUDE "cd.h" +INCLUDE "output.h" +INCLUDE "var.h" +MKINIT char **environ; +INIT { + char **envp; + static char ppid[32] = "PPID="; + const char *p; + struct stat64 st1, st2; + + initvar(); + for (envp = environ ; *envp ; envp++) { + p = endofname(*envp); + if (p != *envp && *p == '=') { + setvareq(*envp, VEXPORT|VTEXTFIXED); + } + } + + setvareq(defifsvar, VTEXTFIXED); + setvareq(defoptindvar, VTEXTFIXED); + + fmtstr(ppid + 5, sizeof(ppid) - 5, "%ld", (long) getppid()); + setvareq(ppid, VTEXTFIXED); + + p = lookupvar("PWD"); + if (p) + if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) || + st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) + p = 0; + setpwd(p, 0); +} + +RESET { + unwindlocalvars(0); +} +#endif + +static char *varnull(const char *s) +{ + return (strchr(s, '=') ?: nullstr - 1) + 1; +} + +/* + * This routine initializes the builtin variables. It is called when the + * shell is initialized. + */ + +void +initvar(void) +{ + struct var *vp; + struct var *end; + struct var **vpp; + + vp = varinit; + end = vp + sizeof(varinit) / sizeof(varinit[0]); + do { + vpp = hashvar(vp->text); + vp->next = *vpp; + *vpp = vp; + } while (++vp < end); + /* + * PS1 depends on uid + */ + if (!geteuid()) + vps1.text = "PS1=# "; +} + +/* + * Set the value of a variable. The flags argument is ored with the + * flags of the variable. If val is NULL, the variable is unset. + */ + +struct var *setvar(const char *name, const char *val, int flags) +{ + char *p, *q; + size_t namelen; + char *nameeq; + size_t vallen; + struct var *vp; + + q = endofname(name); + p = strchrnul(q, '='); + namelen = p - name; + if (!namelen || p != q) + sh_error("%.*s: bad variable name", namelen, name); + vallen = 0; + if (val == NULL) { + flags |= VUNSET; + } else { + vallen = strlen(val); + } + INTOFF; + p = mempcpy(nameeq = ckmalloc(namelen + vallen + 2), name, namelen); + if (val) { + *p++ = '='; + p = mempcpy(p, val, vallen); + } + *p = '\0'; + vp = setvareq(nameeq, flags | VNOSAVE); + INTON; + + return vp; +} + +/* + * Set the given integer as the value of a variable. The flags argument is + * ored with the flags of the variable. + */ + +intmax_t setvarint(const char *name, intmax_t val, int flags) +{ + int len = max_int_length(sizeof(val)); + char buf[len]; + + fmtstr(buf, len, "%" PRIdMAX, val); + setvar(name, buf, flags); + return val; +} + + + +/* + * Same as setvar except that the variable and value are passed in + * the first argument as name=value. Since the first argument will + * be actually stored in the table, it should not be a string that + * will go away. + * Called with interrupts off. + */ + +struct var *setvareq(char *s, int flags) +{ + struct var *vp, **vpp; + + vpp = hashvar(s); + flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1)); + vpp = findvar(vpp, s); + vp = *vpp; + if (vp) { + if (vp->flags & VREADONLY) { + const char *n; + + if (flags & VNOSAVE) + free(s); + n = vp->text; + sh_error("%.*s: is read only", strchrnul(n, '=') - n, + n); + } + + if (flags & VNOSET) + goto out; + + if (vp->func && (flags & VNOFUNC) == 0) + (*vp->func)(varnull(s)); + + if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) + ckfree(vp->text); + + if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) | + (vp->flags & VSTRFIXED)) == VUNSET) { + *vpp = vp->next; + ckfree(vp); +out_free: + if ((flags & (VTEXTFIXED|VSTACK|VNOSAVE)) == VNOSAVE) + ckfree(s); + goto out; + } + + flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + } else { + if (flags & VNOSET) + goto out; + if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) + goto out_free; + /* not found */ + vp = ckmalloc(sizeof (*vp)); + vp->next = *vpp; + vp->func = NULL; + *vpp = vp; + } + if (!(flags & (VTEXTFIXED|VSTACK|VNOSAVE))) + s = savestr(s); + vp->text = s; + vp->flags = flags; + +out: + return vp; +} + +/* + * Find the value of a variable. Returns NULL if not set. + */ + +char * +lookupvar(const char *name) +{ + struct var *v; + + if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) { +#ifdef WITH_LINENO + if (v == &vlineno && v->text == linenovar) { + fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno); + } +#endif + return strchrnul(v->text, '=') + 1; + } + return NULL; +} + +intmax_t lookupvarint(const char *name) +{ + return atomax(lookupvar(name) ?: nullstr, 0); +} + + + +/* + * Generate a list of variables satisfying the given conditions. + */ + +char ** +listvars(int on, int off, char ***end) +{ + struct var **vpp; + struct var *vp; + char **ep; + int mask; + + STARTSTACKSTR(ep); + vpp = vartab; + mask = on | off; + do { + for (vp = *vpp ; vp ; vp = vp->next) + if ((vp->flags & mask) == on) { + if (ep == stackstrend()) + ep = growstackstr(); + *ep++ = (char *) vp->text; + } + } while (++vpp < vartab + VTABSIZE); + if (ep == stackstrend()) + ep = growstackstr(); + if (end) + *end = ep; + *ep++ = NULL; + return grabstackstr(ep); +} + + + +/* + * POSIX requires that 'set' (but not export or readonly) output the + * variables in lexicographic order - by the locale's collating order (sigh). + * Maybe we could keep them in an ordered balanced binary tree + * instead of hashed lists. + * For now just roll 'em through qsort for printing... + */ + +int +showvars(const char *prefix, int on, int off) +{ + const char *sep; + char **ep, **epend; + + ep = listvars(on, off, &epend); + qsort(ep, epend - ep, sizeof(char *), vpcmp); + + sep = *prefix ? spcstr : prefix; + + for (; ep < epend; ep++) { + const char *p; + const char *q; + + p = strchrnul(*ep, '='); + q = nullstr; + if (*p) + q = single_quote(++p); + + out1fmt("%s%s%.*s%s\n", prefix, sep, (int)(p - *ep), *ep, q); + } + + return 0; +} + + + +/* + * The export and readonly commands. + */ + +int +exportcmd(int argc, char **argv) +{ + struct var *vp; + char *name; + const char *p; + char **aptr; + int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT; + int notp; + + notp = nextopt("p") - 'p'; + if (notp && ((name = *(aptr = argptr)))) { + do { + if ((p = strchr(name, '=')) != NULL) { + p++; + } else { + if ((vp = *findvar(hashvar(name), name))) { + vp->flags |= flag; + continue; + } + } + setvar(name, p, flag); + } while ((name = *++aptr) != NULL); + } else { + showvars(argv[0], flag, 0); + } + return 0; +} + + +/* + * The "local" command. + */ + +int +localcmd(int argc, char **argv) +{ + char *name; + + if (!localvar_stack) + sh_error("not in a function"); + + argv = argptr; + while ((name = *argv++) != NULL) { + mklocal(name, 0); + } + return 0; +} + + +/* + * Make a variable a local variable. When a variable is made local, it's + * value and flags are saved in a localvar structure. The saved values + * will be restored when the shell function returns. We handle the name + * "-" as a special case. + */ + +void mklocal(char *name, int flags) +{ + struct localvar *lvp; + struct var **vpp; + struct var *vp; + + INTOFF; + lvp = ckmalloc(sizeof (struct localvar)); + if (name[0] == '-' && name[1] == '\0') { + char *p; + p = ckmalloc(sizeof(optlist)); + lvp->text = memcpy(p, optlist, sizeof(optlist)); + vp = NULL; + } else { + char *eq; + + vpp = hashvar(name); + vp = *findvar(vpp, name); + eq = strchr(name, '='); + if (vp == NULL) { + if (eq) + vp = setvareq(name, VSTRFIXED | flags); + else + vp = setvar(name, NULL, VSTRFIXED | flags); + lvp->flags = VUNSET; + } else { + lvp->text = vp->text; + lvp->flags = vp->flags; + vp->flags |= VSTRFIXED|VTEXTFIXED; + if (eq) + setvareq(name, flags); + } + } + lvp->vp = vp; + lvp->next = localvar_stack->lv; + localvar_stack->lv = lvp; + INTON; +} + + +/* + * Called after a function returns. + * Interrupts must be off. + */ + +static void +poplocalvars(void) +{ + struct localvar_list *ll; + struct localvar *lvp, *next; + struct var *vp; + + INTOFF; + ll = localvar_stack; + localvar_stack = ll->next; + + next = ll->lv; + ckfree(ll); + + while ((lvp = next) != NULL) { + next = lvp->next; + vp = lvp->vp; + TRACE(("poplocalvar %s\n", vp ? vp->text : "-")); + if (vp == NULL) { /* $- saved */ + memcpy(optlist, lvp->text, sizeof(optlist)); + ckfree(lvp->text); + optschanged(); + } else if (lvp->flags == VUNSET) { + vp->flags &= ~(VSTRFIXED|VREADONLY); + unsetvar(vp->text); + } else { + if (vp->func) + (*vp->func)(varnull(lvp->text)); + if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) + ckfree(vp->text); + vp->flags = lvp->flags; + vp->text = lvp->text; + } + ckfree(lvp); + } + INTON; +} + + +/* + * Create a new localvar environment. + */ +struct localvar_list *pushlocalvars(int push) +{ + struct localvar_list *ll; + struct localvar_list *top; + + top = localvar_stack; + if (!push) + goto out; + + INTOFF; + ll = ckmalloc(sizeof(*ll)); + ll->lv = NULL; + ll->next = top; + localvar_stack = ll; + INTON; + +out: + return top; +} + + +void unwindlocalvars(struct localvar_list *stop) +{ + while (localvar_stack != stop) + poplocalvars(); +} + + +/* + * The unset builtin command. We unset the function before we unset the + * variable to allow a function to be unset when there is a readonly variable + * with the same name. + */ + +int +unsetcmd(int argc, char **argv) +{ + char **ap; + int i; + int flag = 0; + + while ((i = nextopt("vf")) != '\0') { + flag = i; + } + + for (ap = argptr; *ap ; ap++) { + if (flag != 'f') { + unsetvar(*ap); + continue; + } + if (flag != 'v') + unsetfunc(*ap); + } + return 0; +} + + +/* + * Unset the specified variable. + */ + +void unsetvar(const char *s) +{ + setvar(s, 0, 0); +} + + + +/* + * Find the appropriate entry in the hash table from the name. + */ + +STATIC struct var ** +hashvar(const char *p) +{ + unsigned int hashval; + + hashval = ((unsigned char) *p) << 4; + while (*p && *p != '=') + hashval += (unsigned char) *p++; + return &vartab[hashval % VTABSIZE]; +} + + + +/* + * Compares two strings up to the first = or '\0'. The first + * string must be terminated by '='; the second may be terminated by + * either '=' or '\0'. + */ + +int +varcmp(const char *p, const char *q) +{ + int c, d; + + while ((c = *p) == (d = *q)) { + if (!c || c == '=') + goto out; + p++; + q++; + } + if (c == '=') + c = 0; + if (d == '=') + d = 0; +out: + return c - d; +} + +STATIC int +vpcmp(const void *a, const void *b) +{ + return varcmp(*(const char **)a, *(const char **)b); +} + +STATIC struct var ** +findvar(struct var **vpp, const char *name) +{ + for (; *vpp; vpp = &(*vpp)->next) { + if (varequal((*vpp)->text, name)) { + break; + } + } + return vpp; +} diff --git a/examples/error.c b/examples/error.c new file mode 100644 index 0000000..a0c6cb8 --- /dev/null +++ b/examples/error.c @@ -0,0 +1,2 @@ +INIT { +} diff --git a/examples/simple.c b/examples/simple.c index 4d3139c..16629ee 100644 --- a/examples/simple.c +++ b/examples/simple.c @@ -1,5 +1,7 @@ +void foo() { if (1) if (1) 1; else 1; +} diff --git a/examples/small_dash_var.c b/examples/small_dash_var.c new file mode 100644 index 0000000..33b3c49 --- /dev/null +++ b/examples/small_dash_var.c @@ -0,0 +1,678 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1997-2005 + * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Kenneth Almquist. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_PATHS_H +#include <paths.h> +#endif + +/* + * Shell variables. + */ + +#include "shell.h" +#include "output.h" +#include "expand.h" +#include "nodes.h" /* for other headers */ +#include "exec.h" +#include "syntax.h" +#include "options.h" +#include "mail.h" +#include "var.h" +#include "memalloc.h" +#include "error.h" +#include "mystring.h" +#include "parser.h" +#include "show.h" +#ifndef SMALL +#include "myhistedit.h" +#endif +#include "system.h" + + +#define VTABSIZE 39 + + +struct localvar_list { + struct localvar_list *next; + struct localvar *lv; +}; + +MKINIT struct localvar_list *localvar_stack; + +const char defpathvar[] = + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; +char defifsvar[] = "IFS= \t\n"; +MKINIT char defoptindvar[] = "OPTIND=1"; + +int lineno; +char linenovar[sizeof("LINENO=")+sizeof(int)*CHAR_BIT/3+1] = "LINENO="; + +/* Some macros in var.h depend on the order, add new variables to the end. */ +struct var varinit[] = { +#if ATTY + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 }, +#endif + { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail }, + { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath }, + { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 }, + { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset }, +#ifdef WITH_LINENO + { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 }, +#endif +#ifndef SMALL + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "TERM\0", 0 }, + { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "HISTSIZE\0", sethistsize }, +#endif +}; + +STATIC struct var *vartab[VTABSIZE]; + +STATIC struct var **hashvar(const char *); +STATIC int vpcmp(const void *, const void *); +STATIC struct var **findvar(struct var **, const char *); + +/* + * Initialize the varable symbol tables and import the environment + */ + +#ifdef mkinit +INCLUDE <unistd.h> +INCLUDE <sys/types.h> +INCLUDE <sys/stat.h> +INCLUDE "cd.h" +INCLUDE "output.h" +INCLUDE "var.h" +MKINIT char **environ; +INIT { + char **envp; + static char ppid[32] = "PPID="; + const char *p; + struct stat64 st1, st2; + + initvar(); + for (envp = environ ; *envp ; envp++) { + p = endofname(*envp); + if (p != *envp && *p == '=') { + setvareq(*envp, VEXPORT|VTEXTFIXED); + } + } + + setvareq(defifsvar, VTEXTFIXED); + setvareq(defoptindvar, VTEXTFIXED); + + fmtstr(ppid + 5, sizeof(ppid) - 5, "%ld", (long) getppid()); + setvareq(ppid, VTEXTFIXED); + + p = lookupvar("PWD"); + if (p) + if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) || + st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino) + p = 0; + setpwd(p, 0); +} + +RESET { + unwindlocalvars(0); +} +#endif + +static char *varnull(const char *s) +{ + return (strchr(s, '=') ?: nullstr - 1) + 1; +} + +/* + * This routine initializes the builtin variables. It is called when the + * shell is initialized. + */ + +void +initvar(void) +{ + struct var *vp; + struct var *end; + struct var **vpp; + + vp = varinit; + end = vp + sizeof(varinit) / sizeof(varinit[0]); + do { + vpp = hashvar(vp->text); + vp->next = *vpp; + *vpp = vp; + } while (++vp < end); + /* + * PS1 depends on uid + */ + if (!geteuid()) + vps1.text = "PS1=# "; +} + +/* + * Set the value of a variable. The flags argument is ored with the + * flags of the variable. If val is NULL, the variable is unset. + */ + +struct var *setvar(const char *name, const char *val, int flags) +{ + char *p, *q; + size_t namelen; + char *nameeq; + size_t vallen; + struct var *vp; + + q = endofname(name); + p = strchrnul(q, '='); + namelen = p - name; + if (!namelen || p != q) + sh_error("%.*s: bad variable name", namelen, name); + vallen = 0; + if (val == NULL) { + flags |= VUNSET; + } else { + vallen = strlen(val); + } + INTOFF; + p = mempcpy(nameeq = ckmalloc(namelen + vallen + 2), name, namelen); + if (val) { + *p++ = '='; + p = mempcpy(p, val, vallen); + } + *p = '\0'; + vp = setvareq(nameeq, flags | VNOSAVE); + INTON; + + return vp; +} + +/* + * Set the given integer as the value of a variable. The flags argument is + * ored with the flags of the variable. + */ + +intmax_t setvarint(const char *name, intmax_t val, int flags) +{ + int len = max_int_length(sizeof(val)); + char buf[len]; + + fmtstr(buf, len, "%" PRIdMAX, val); + setvar(name, buf, flags); + return val; +} + + + +/* + * Same as setvar except that the variable and value are passed in + * the first argument as name=value. Since the first argument will + * be actually stored in the table, it should not be a string that + * will go away. + * Called with interrupts off. + */ + +struct var *setvareq(char *s, int flags) +{ + struct var *vp, **vpp; + + vpp = hashvar(s); + flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1)); + vpp = findvar(vpp, s); + vp = *vpp; + if (vp) { + if (vp->flags & VREADONLY) { + const char *n; + + if (flags & VNOSAVE) + free(s); + n = vp->text; + sh_error("%.*s: is read only", strchrnul(n, '=') - n, + n); + } + + if (flags & VNOSET) + goto out; + + if (vp->func && (flags & VNOFUNC) == 0) + (*vp->func)(varnull(s)); + + if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) + ckfree(vp->text); + + if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) | + (vp->flags & VSTRFIXED)) == VUNSET) { + *vpp = vp->next; + ckfree(vp); +out_free: + if ((flags & (VTEXTFIXED|VSTACK|VNOSAVE)) == VNOSAVE) + ckfree(s); + goto out; + } + + flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET); + } else { + if (flags & VNOSET) + goto out; + if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET) + goto out_free; + /* not found */ + vp = ckmalloc(sizeof (*vp)); + vp->next = *vpp; + vp->func = NULL; + *vpp = vp; + } + if (!(flags & (VTEXTFIXED|VSTACK|VNOSAVE))) + s = savestr(s); + vp->text = s; + vp->flags = flags; + +out: + return vp; +} + +/* + * Find the value of a variable. Returns NULL if not set. + */ + +char * +lookupvar(const char *name) +{ + struct var *v; + + if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) { +#ifdef WITH_LINENO + if (v == &vlineno && v->text == linenovar) { + fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno); + } +#endif + return strchrnul(v->text, '=') + 1; + } + return NULL; +} + +intmax_t lookupvarint(const char *name) +{ + return atomax(lookupvar(name) ?: nullstr, 0); +} + + + +/* + * Generate a list of variables satisfying the given conditions. + */ + +char ** +listvars(int on, int off, char ***end) +{ + struct var **vpp; + struct var *vp; + char **ep; + int mask; + + STARTSTACKSTR(ep); + vpp = vartab; + mask = on | off; + do { + for (vp = *vpp ; vp ; vp = vp->next) + if ((vp->flags & mask) == on) { + if (ep == stackstrend()) + ep = growstackstr(); + *ep++ = (char *) vp->text; + } + } while (++vpp < vartab + VTABSIZE); + if (ep == stackstrend()) + ep = growstackstr(); + if (end) + *end = ep; + *ep++ = NULL; + return grabstackstr(ep); +} + + + +/* + * POSIX requires that 'set' (but not export or readonly) output the + * variables in lexicographic order - by the locale's collating order (sigh). + * Maybe we could keep them in an ordered balanced binary tree + * instead of hashed lists. + * For now just roll 'em through qsort for printing... + */ + +int +showvars(const char *prefix, int on, int off) +{ + const char *sep; + char **ep, **epend; + + ep = listvars(on, off, &epend); + qsort(ep, epend - ep, sizeof(char *), vpcmp); + + sep = *prefix ? spcstr : prefix; + + for (; ep < epend; ep++) { + const char *p; + const char *q; + + p = strchrnul(*ep, '='); + q = nullstr; + if (*p) + q = single_quote(++p); + + out1fmt("%s%s%.*s%s\n", prefix, sep, (int)(p - *ep), *ep, q); + } + + return 0; +} + + + +/* + * The export and readonly commands. + */ + +int +exportcmd(int argc, char **argv) +{ + struct var *vp; + char *name; + const char *p; + char **aptr; + int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT; + int notp; + + notp = nextopt("p") - 'p'; + if (notp && ((name = *(aptr = argptr)))) { + do { + if ((p = strchr(name, '=')) != NULL) { + p++; + } else { + if ((vp = *findvar(hashvar(name), name))) { + vp->flags |= flag; + continue; + } + } + setvar(name, p, flag); + } while ((name = *++aptr) != NULL); + } else { + showvars(argv[0], flag, 0); + } + return 0; +} + + +/* + * The "local" command. + */ + +int +localcmd(int argc, char **argv) +{ + char *name; + + if (!localvar_stack) + sh_error("not in a function"); + + argv = argptr; + while ((name = *argv++) != NULL) { + mklocal(name, 0); + } + return 0; +} + + +/* + * Make a variable a local variable. When a variable is made local, it's + * value and flags are saved in a localvar structure. The saved values + * will be restored when the shell function returns. We handle the name + * "-" as a special case. + */ + +void mklocal(char *name, int flags) +{ + struct localvar *lvp; + struct var **vpp; + struct var *vp; + + INTOFF; + lvp = ckmalloc(sizeof (struct localvar)); + if (name[0] == '-' && name[1] == '\0') { + char *p; + p = ckmalloc(sizeof(optlist)); + lvp->text = memcpy(p, optlist, sizeof(optlist)); + vp = NULL; + } else { + char *eq; + + vpp = hashvar(name); + vp = *findvar(vpp, name); + eq = strchr(name, '='); + if (vp == NULL) { + if (eq) + vp = setvareq(name, VSTRFIXED | flags); + else + vp = setvar(name, NULL, VSTRFIXED | flags); + lvp->flags = VUNSET; + } else { + lvp->text = vp->text; + lvp->flags = vp->flags; + vp->flags |= VSTRFIXED|VTEXTFIXED; + if (eq) + setvareq(name, flags); + } + } + lvp->vp = vp; + lvp->next = localvar_stack->lv; + localvar_stack->lv = lvp; + INTON; +} + + +/* + * Called after a function returns. + * Interrupts must be off. + */ + +static void +poplocalvars(void) +{ + struct localvar_list *ll; + struct localvar *lvp, *next; + struct var *vp; + + INTOFF; + ll = localvar_stack; + localvar_stack = ll->next; + + next = ll->lv; + ckfree(ll); + + while ((lvp = next) != NULL) { + next = lvp->next; + vp = lvp->vp; + TRACE(("poplocalvar %s\n", vp ? vp->text : "-")); + if (vp == NULL) { /* $- saved */ + memcpy(optlist, lvp->text, sizeof(optlist)); + ckfree(lvp->text); + optschanged(); + } else if (lvp->flags == VUNSET) { + vp->flags &= ~(VSTRFIXED|VREADONLY); + unsetvar(vp->text); + } else { + if (vp->func) + (*vp->func)(varnull(lvp->text)); + if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0) + ckfree(vp->text); + vp->flags = lvp->flags; + vp->text = lvp->text; + } + ckfree(lvp); + } + INTON; +} + + +/* + * Create a new localvar environment. + */ +struct localvar_list *pushlocalvars(int push) +{ + struct localvar_list *ll; + struct localvar_list *top; + + top = localvar_stack; + if (!push) + goto out; + + INTOFF; + ll = ckmalloc(sizeof(*ll)); + ll->lv = NULL; + ll->next = top; + localvar_stack = ll; + INTON; + +out: + return top; +} + + +void unwindlocalvars(struct localvar_list *stop) +{ + while (localvar_stack != stop) + poplocalvars(); +} + + +/* + * The unset builtin command. We unset the function before we unset the + * variable to allow a function to be unset when there is a readonly variable + * with the same name. + */ + +int +unsetcmd(int argc, char **argv) +{ + char **ap; + int i; + int flag = 0; + + while ((i = nextopt("vf")) != '\0') { + flag = i; + } + + for (ap = argptr; *ap ; ap++) { + if (flag != 'f') { + unsetvar(*ap); + continue; + } + if (flag != 'v') + unsetfunc(*ap); + } + return 0; +} + + +/* + * Unset the specified variable. + */ + +void unsetvar(const char *s) +{ + setvar(s, 0, 0); +} + + + +/* + * Find the appropriate entry in the hash table from the name. + */ + +STATIC struct var ** +hashvar(const char *p) +{ + unsigned int hashval; + + hashval = ((unsigned char) *p) << 4; + while (*p && *p != '=') + hashval += (unsigned char) *p++; + return &vartab[hashval % VTABSIZE]; +} + + + +/* + * Compares two strings up to the first = or '\0'. The first + * string must be terminated by '='; the second may be terminated by + * either '=' or '\0'. + */ + +int +varcmp(const char *p, const char *q) +{ + int c, d; + + while ((c = *p) == (d = *q)) { + if (!c || c == '=') + goto out; + p++; + q++; + } + if (c == '=') + c = 0; + if (d == '=') + d = 0; +out: + return c - d; +} + +STATIC int +vpcmp(const void *a, const void *b) +{ + return varcmp(*(const char **)a, *(const char **)b); +} + +STATIC struct var ** +findvar(struct var **vpp, const char *name) +{ + for (; *vpp; vpp = &(*vpp)->next) { + if (varequal((*vpp)->text, name)) { + break; + } + } + return vpp; +} diff --git a/examples/tiny.c b/examples/tiny.c new file mode 100644 index 0000000..ff80f0c --- /dev/null +++ b/examples/tiny.c @@ -0,0 +1,430 @@ +int +save_bash_input (fd, new_fd) + int fd, new_fd; +{ + int nfd; + + /* Sync the stream so we can re-read from the new file descriptor. We + might be able to avoid this by copying the buffered stream verbatim + to the new file descriptor. */ + if (buffers[fd]) + sync_buffered_stream (fd); + + /* Now take care of duplicating the file descriptor that bash is + using for input, so we can reinitialize it later. */ + nfd = (new_fd == -1) ? fcntl (fd, F_DUPFD, 10) : new_fd; + if (nfd == -1) + { + if (fcntl (fd, F_GETFD, 0) == 0) + sys_error (_("cannot allocate new file descriptor for bash input from fd %d"), fd); + return -1; + } + + if (nfd < nbuffers && buffers[nfd]) + { + /* What's this? A stray buffer without an associated open file + descriptor? Free up the buffer and report the error. */ + internal_error (_("save_bash_input: buffer already exists for new fd %d"), nfd); + if (buffers[nfd]->b_flag & B_SHAREDBUF) + buffers[nfd]->b_buffer = (char *)NULL; + free_buffered_stream (buffers[nfd]); + } + + /* Reinitialize bash_input.location. */ + if (bash_input.type == st_bstream) + { + bash_input.location.buffered_fd = nfd; + fd_to_buffered_stream (nfd); + close_buffered_fd (fd); /* XXX */ + } + else + /* If the current input type is not a buffered stream, but the shell + is not interactive and therefore using a buffered stream to read + input (e.g. with an `eval exec 3>output' inside a script), note + that the input fd has been changed. pop_stream() looks at this + value and adjusts the input fd to the new value of + default_buffered_input accordingly. */ + bash_input_fd_changed++; + + if (default_buffered_input == fd) + default_buffered_input = nfd; + + SET_CLOSE_ON_EXEC (nfd); + return nfd; +} + +/* Check that file descriptor FD is not the one that bash is currently + using to read input from a script. FD is about to be duplicated onto, + which means that the kernel will close it for us. If FD is the bash + input file descriptor, we need to seek backwards in the script (if + possible and necessary -- scripts read from stdin are still unbuffered), + allocate a new file descriptor to use for bash input, and re-initialize + the buffered stream. Make sure the file descriptor used to save bash + input is set close-on-exec. Returns 0 on success, -1 on failure. This + works only if fd is > 0 -- if fd == 0 and bash is reading input from + fd 0, sync_buffered_stream is used instead, to cooperate with input + redirection (look at redir.c:add_undo_redirect()). */ +int +check_bash_input (fd) + int fd; +{ + if (fd_is_bash_input (fd)) + { + if (fd > 0) + return ((save_bash_input (fd, -1) == -1) ? -1 : 0); + else if (fd == 0) + return ((sync_buffered_stream (fd) == -1) ? -1 : 0); + } + return 0; +} + +/* This is the buffered stream analogue of dup2(fd1, fd2). The + BUFFERED_STREAM corresponding to fd2 is deallocated, if one exists. + BUFFERS[fd1] is copied to BUFFERS[fd2]. This is called by the + redirect code for constructs like 4<&0 and 3</etc/rc.local. */ +int +duplicate_buffered_stream (fd1, fd2) + int fd1, fd2; +{ + int is_bash_input, m; + + if (fd1 == fd2) + return 0; + + m = max (fd1, fd2); + ALLOCATE_BUFFERS (m); + + /* If FD2 is the file descriptor bash is currently using for shell input, + we need to do some extra work to make sure that the buffered stream + actually exists (it might not if fd1 was not active, and the copy + didn't actually do anything). */ + is_bash_input = (bash_input.type == st_bstream) && + (bash_input.location.buffered_fd == fd2); + + if (buffers[fd2]) + { + /* If the two objects share the same b_buffer, don't free it. */ + if (buffers[fd1] && buffers[fd1]->b_buffer && buffers[fd1]->b_buffer == buffers[fd2]->b_buffer) + buffers[fd2] = (BUFFERED_STREAM *)NULL; + /* If this buffer is shared with another fd, don't free the buffer */ + else if (buffers[fd2]->b_flag & B_SHAREDBUF) + { + buffers[fd2]->b_buffer = (char *)NULL; + free_buffered_stream (buffers[fd2]); + } + else + free_buffered_stream (buffers[fd2]); + } + buffers[fd2] = copy_buffered_stream (buffers[fd1]); + if (buffers[fd2]) + buffers[fd2]->b_fd = fd2; + + if (is_bash_input) + { + if (!buffers[fd2]) + fd_to_buffered_stream (fd2); + buffers[fd2]->b_flag |= B_WASBASHINPUT; + } + + if (fd_is_bash_input (fd1) || (buffers[fd1] && (buffers[fd1]->b_flag & B_SHAREDBUF))) + buffers[fd2]->b_flag |= B_SHAREDBUF; + + return (fd2); +} + +/* Return 1 if a seek on FD will succeed. */ +#define fd_is_seekable(fd) (lseek ((fd), 0L, SEEK_CUR) >= 0) + +/* Take FD, a file descriptor, and create and return a buffered stream + corresponding to it. If something is wrong and the file descriptor + is invalid, return a NULL stream. */ +BUFFERED_STREAM * +fd_to_buffered_stream (fd) + int fd; +{ + char *buffer; + size_t size; + struct stat sb; + + if (fstat (fd, &sb) < 0) + { + close (fd); + return ((BUFFERED_STREAM *)NULL); + } + + size = (fd_is_seekable (fd)) ? min (sb.st_size, MAX_INPUT_BUFFER_SIZE) : 1; + if (size == 0) + size = 1; + buffer = (char *)xmalloc (size); + + return (make_buffered_stream (fd, buffer, size)); +} + +/* Return a buffered stream corresponding to FILE, a file name. */ +BUFFERED_STREAM * +open_buffered_stream (file) + char *file; +{ + int fd; + + fd = open (file, O_RDONLY); + return ((fd >= 0) ? fd_to_buffered_stream (fd) : (BUFFERED_STREAM *)NULL); +} + +/* Deallocate a buffered stream and free up its resources. Make sure we + zero out the slot in BUFFERS that points to BP. */ +void +free_buffered_stream (bp) + BUFFERED_STREAM *bp; +{ + int n; + + if (!bp) + return; + + n = bp->b_fd; + if (bp->b_buffer) + free (bp->b_buffer); + free (bp); + buffers[n] = (BUFFERED_STREAM *)NULL; +} + +/* Close the file descriptor associated with BP, a buffered stream, and free + up the stream. Return the status of closing BP's file descriptor. */ +int +close_buffered_stream (bp) + BUFFERED_STREAM *bp; +{ + int fd; + + if (!bp) + return (0); + fd = bp->b_fd; + if (bp->b_flag & B_SHAREDBUF) + bp->b_buffer = (char *)NULL; + free_buffered_stream (bp); + return (close (fd)); +} + +/* Deallocate the buffered stream associated with file descriptor FD, and + close FD. Return the status of the close on FD. */ +int +close_buffered_fd (fd) + int fd; +{ + if (fd < 0) + { + errno = EBADF; + return -1; + } + if (fd >= nbuffers || !buffers || !buffers[fd]) + return (close (fd)); + return (close_buffered_stream (buffers[fd])); +} + +/* Make the BUFFERED_STREAM associated with buffers[FD] be BP, and return + the old BUFFERED_STREAM. */ +BUFFERED_STREAM * +set_buffered_stream (fd, bp) + int fd; + BUFFERED_STREAM *bp; +{ + BUFFERED_STREAM *ret; + + ret = buffers[fd]; + buffers[fd] = bp; + return ret; +} + +/* Read a buffer full of characters from BP, a buffered stream. */ +static int +b_fill_buffer (bp) + BUFFERED_STREAM *bp; +{ + ssize_t nr; + off_t o; + + CHECK_TERMSIG; + /* In an environment where text and binary files are treated differently, + compensate for lseek() on text files returning an offset different from + the count of characters read() returns. Text-mode streams have to be + treated as unbuffered. */ + if ((bp->b_flag & (B_TEXT | B_UNBUFF)) == B_TEXT) + { + o = lseek (bp->b_fd, 0, SEEK_CUR); + nr = zread (bp->b_fd, bp->b_buffer, bp->b_size); + if (nr > 0 && nr < lseek (bp->b_fd, 0, SEEK_CUR) - o) + { + lseek (bp->b_fd, o, SEEK_SET); + bp->b_flag |= B_UNBUFF; + bp->b_size = 1; + nr = zread (bp->b_fd, bp->b_buffer, bp->b_size); + } + } + else + nr = zread (bp->b_fd, bp->b_buffer, bp->b_size); + if (nr <= 0) + { + bp->b_used = bp->b_inputp = 0; + bp->b_buffer[0] = 0; + if (nr == 0) + bp->b_flag |= B_EOF; + else + bp->b_flag |= B_ERROR; + return (EOF); + } + + bp->b_used = nr; + bp->b_inputp = 0; + return (bp->b_buffer[bp->b_inputp++] & 0xFF); +} + +/* Get a character from buffered stream BP. */ +#define bufstream_getc(bp) \ + (bp->b_inputp == bp->b_used || !bp->b_used) \ + ? b_fill_buffer (bp) \ + : bp->b_buffer[bp->b_inputp++] & 0xFF + +/* Push C back onto buffered stream BP. */ +static int +bufstream_ungetc(c, bp) + int c; + BUFFERED_STREAM *bp; +{ + if (c == EOF || bp == 0 || bp->b_inputp == 0) + return (EOF); + + bp->b_buffer[--bp->b_inputp] = c; + return (c); +} + +/* Seek backwards on file BFD to synchronize what we've read so far + with the underlying file pointer. */ +int +sync_buffered_stream (bfd) + int bfd; +{ + BUFFERED_STREAM *bp; + off_t chars_left; + + if (buffers == 0 || (bp = buffers[bfd]) == 0) + return (-1); + + chars_left = bp->b_used - bp->b_inputp; + if (chars_left) + lseek (bp->b_fd, -chars_left, SEEK_CUR); + bp->b_used = bp->b_inputp = 0; + return (0); +} + +int +buffered_getchar () +{ + CHECK_TERMSIG; + + if (bash_input.location.buffered_fd < 0 || buffers[bash_input.location.buffered_fd] == 0) + return EOF; + +#if !defined (DJGPP) + return (bufstream_getc (buffers[bash_input.location.buffered_fd])); +#else + /* On DJGPP, ignore \r. */ + int ch; + while ((ch = bufstream_getc (buffers[bash_input.location.buffered_fd])) == '\r') + ; + return ch; +#endif +} + +int +buffered_ungetchar (c) + int c; +{ + return (bufstream_ungetc (c, buffers[bash_input.location.buffered_fd])); +} + +/* Make input come from file descriptor BFD through a buffered stream. */ +void +with_input_from_buffered_stream (bfd, name) + int bfd; + char *name; +{ + INPUT_STREAM location; + BUFFERED_STREAM *bp; + + location.buffered_fd = bfd; + /* Make sure the buffered stream exists. */ + bp = fd_to_buffered_stream (bfd); + init_yy_io (bp == 0 ? return_EOF : buffered_getchar, + buffered_ungetchar, st_bstream, name, location); +} + +#if defined (TEST) +void * +xmalloc(s) +int s; +{ + return (malloc (s)); +} + +void * +xrealloc(s, size) +char *s; +int size; +{ + if (!s) + return(malloc (size)); + else + return(realloc (s, size)); +} + +void +init_yy_io () +{ +} + +process(bp) +BUFFERED_STREAM *bp; +{ + int c; + + while ((c = bufstream_getc(bp)) != EOF) + putchar(c); +} + +BASH_INPUT bash_input; + +struct stat dsb; /* can be used from gdb */ + +/* imitate /bin/cat */ +main(argc, argv) +int argc; +char **argv; +{ + register int i; + BUFFERED_STREAM *bp; + + if (argc == 1) { + bp = fd_to_buffered_stream (0); + process(bp); + exit(0); + } + for (i = 1; i < argc; i++) { + if (argv[i][0] == '-' && argv[i][1] == '\0') { + bp = fd_to_buffered_stream (0); + if (!bp) + continue; + process(bp); + free_buffered_stream (bp); + } else { + bp = open_buffered_stream (argv[i]); + if (!bp) + continue; + process(bp); + close_buffered_stream (bp); + } + } + exit(0); +} +#endif /* TEST */ +#endif /* BUFFERED_INPUT */ diff --git a/extras/ftdetect/earlpy.vim b/extras/ftdetect/earlpy.vim new file mode 100755 index 0000000..f63a7b4 --- /dev/null +++ b/extras/ftdetect/earlpy.vim @@ -0,0 +1 @@ +au BufRead,BufNewFile *.earlpy set filetype=earlpy diff --git a/extras/syntax/earlpy.vim b/extras/syntax/earlpy.vim new file mode 100644 index 0000000..429db40 --- /dev/null +++ b/extras/syntax/earlpy.vim @@ -0,0 +1,20 @@ +" https://vim.fandom.com/wiki/Creating_your_own_syntax_files +" Vim syntax file +" Language: Celestia Star Catalogs +" Maintainer: Kevin Lauder +" Latest Revision: 26 April 2008 + +if exists("b:current_syntax") + finish +endif + +let b:current_syntax = "earlpy" + +" Matches +syn match basicKeywords "\zs.*\ze$" +syn match literalTag "\zs .*\ze$" +syn match comment "\zs#.*\ze$" + +hi def link literalTag Type +hi def link basicKeywords Keyword +hi def link comment Comment diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c index 403d65f..b6a99e8 100644 --- a/grammars/c/disambiguate.c +++ b/grammars/c/disambiguate.c @@ -1,47 +1,41 @@ -struct token *TYPE_NAMES[1024]; -size_t N_TYPE_NAMES; - -void alert_parse(struct state *state) { - if (PRODUCTION_ID_TO_SYMBOL[state->production_id] == SYMBOL_TYPEDEF) { - for (struct token *t = find_token(state, 2); t->symbol != DONE_SYMBOL; t++) { - if (t->symbol == SYMBOL_IDENT) { - TYPE_NAMES[N_TYPE_NAMES++] = t; - break; - } - } - } -} - -int is_typename(struct token *token) { - if (!strcmp("int", token->string)) return 1; - for (size_t i = 0; i < N_TYPE_NAMES; i++) - if (!strcmp(TYPE_NAMES[i]->string, token->string)) - return 1; - return 0; -} +void alert_parse(struct state *state) { } int disambiguator(struct state *old, struct state *new) { - // printf("Old tree:\n"); - // print_parse_tree(old, 4); - // printf("New tree:\n"); - // print_parse_tree(new, 4); + // fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); + // print_parse_tree(old, 0, stderr); + // print_parse_tree(new, 0, stderr); + // fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); - if (old->production_id == PRODUCTION_DECL_STMT) - if (!is_typename(find_token(old->reasons[0], 0))) - return 1; - if (new->production_id == PRODUCTION_DECL_STMT) - if (!is_typename(find_token(new->reasons[0], 0))) - return 0; + if (old->n_poisoned < new->n_poisoned) return 0; + if (new->n_poisoned < old->n_poisoned) return 1; // Prefer the earlier parsings in the grammar when two entirely different // productions are taken. if (old->production_id != new->production_id) - return old->production_id < new->production_id - ? 0 : 1; + return (old->production_id < new->production_id) ? 0 : 1; // If they're the same production ... prod_id_t prod = old->production_id; - if (PRODUCTION_ID_TO_SYMBOL[prod] == SYMBOL_EXPR) { + + if (PRODUCTION_ID_TO_SYMBOL[prod] == START_SYMBOL + && PRODUCTION_ID_TO_PRODUCTION[prod][1] != DONE_SYMBOL) { + struct token *old_tok = find_token(old, 1), + *new_tok = find_token(new, 1); + if (old_tok < new_tok) return 0; + else if (old_tok > new_tok) return 1; + } + + if (PRODUCTION_ID_TO_PRODUCTION[prod][0] == SYMBOL_ERROR && PRODUCTION_ID_TO_PRODUCTION[prod][1] != DONE_SYMBOL) { + struct token *old_tok = find_token(old, 1), + *new_tok = find_token(new, 1); + if (old_tok < new_tok) return 0; + else if (old_tok > new_tok) return 1; + } else if (PRODUCTION_ID_TO_PRODUCTION[prod][1] == SYMBOL_ERROR) { + struct token *old_tok = find_token(old, 1), + *new_tok = find_token(new, 1); + if (old_tok < new_tok) return 1; + else if (old_tok > new_tok) return 0; + } else if (PRODUCTION_ID_TO_SYMBOL[prod] == SYMBOL_EXPR) { if (PRODUCTION_ID_TO_PRODUCTION[prod][1] == SYMBOL_OP) { struct token *old_tok = find_token(old, 1), *new_tok = find_token(new, 1); @@ -49,7 +43,7 @@ int disambiguator(struct state *old, struct state *new) { const char *precedence[] = {".", "->", "*", "/", "%", "+", "-", "<<", ">>", "<", "<=", ">", ">=", "==", "!=", "&", "|", "&&", "||", "=", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", - "^=", "|=", ",", 0}; + "^=", "|=", ",", ":", 0}; if (strcmp(old_s, new_s)) { for (const char **p = precedence; *p; p++) { if (!strcmp(old_s, *p)) { @@ -58,16 +52,30 @@ int disambiguator(struct state *old, struct state *new) { return 0; } } - // BAD! - return 2; + fprintf(stderr, "ERROR: didn't find operator '%s'\n", old_s); + exit(1); } else { - // Associate RIGHT if (old_tok < new_tok) return 1; else if (old_tok > new_tok) return 0; } } } + // Generally speaking, we want left associativity to avoid long chains of + // completions. + struct token *old_tok = find_token(old, 1), + *new_tok = find_token(new, 1); + if (old_tok < new_tok) return 1; + else if (old_tok > new_tok) return 0; + fprintf(stderr, "TOTALLY UNKNOWN!\n"); + fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n"); + pprint_state(old); + // print_parse_tree(old, 0, stderr); + fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n"); + pprint_state(new); + // print_parse_tree(new, 0, stderr); + fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n"); + exit(1); return 2; } diff --git a/grammars/c/grammar.earlpy b/grammars/c/grammar.earlpy new file mode 100644 index 0000000..99cafe9 --- /dev/null +++ b/grammars/c/grammar.earlpy @@ -0,0 +1,265 @@ +#### OPTIMIZATIONS: +# where possible, we want parse trees that look like: +# (((A + B) + C) + D), i.e., left-associativity because it avoids long chains +# of completions. Another explanation for it is that then ambiguity is resolved +# as early in the left-to-right parse as possible. + +KEYWORDS list + switch volatile case while do else const for if + struct union typedef void return break continue + sizeof + +IDENT regex + [a-zA-Z_][0-9a-zA-Z_]* + +INT regex + ((0x[0-9a-fA-F]*)|([0-9]*))([uUlL])* + +# https://stackoverflow.com/questions/2039795/regular-expression-for-a-string-literal-in-flex-lex +STRING regex + ["]([^\\"]|\\.)*["] + +CHAR regex + [']([\\][']|[^'][^'])*[^']?['] + +OP list + ; , + - + ! % * & / << >> ^ | + -= += != %= *= &= /= <<= == >>= ^= |= + && || ++ -- + < <= > >= = + . -> + +TERNARY list + : ? + +PARENS list + ( ) { } [ ] + +############### ERROR RECOVERY +# These rules match either a single token, or a pair of balanced parentheses + +NONPAREN nonterm + KEYWORDS + IDENT + INT + STRING + CHAR + TERNARY + OP + +ERROR_INNER nonterm .poison + ERROR + ERROR_INNER ERROR + +ERROR nonterm .poison + ( ERROR_INNER ) + { ERROR_INNER } + [ ERROR_INNER ] + ( ) + { } + [ ] + NONPAREN + +############### TYPE PARSING +# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing, +# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression) +PRIMITIVE_TYPE nonterm + struct IDENT + union IDENT + struct IDENT AGGREGATE_DECLARATION + union IDENT AGGREGATE_DECLARATION + const PRIMITIVE_TYPE + volatile PRIMITIVE_TYPE + void + IDENT + +# A TYPE_EXPRESSION is basically an lvalue expression. +TYPE_EXPRESSION nonterm + IDENT + TYPE_EXPRESSION [ ] + TYPE_EXPRESSION [ EXPR ] + * TYPE_EXPRESSION + const TYPE_EXPRESSION + ( TYPE_EXPRESSION ) + TYPE_EXPRESSION ( ) + TYPE_EXPRESSION ( ARGS ) + +DECLARATION nonterm + PRIMITIVE_TYPE TYPE_EXPRESSION + +# An ANONYMOUS_TYPE has no name +ANONYMOUS_TYPE nonterm + PRIMITIVE_TYPE + ANONYMOUS_TYPE [ ] + ANONYMOUS_TYPE [ EXPR ] + ANONYMOUS_TYPE * + ANONYMOUS_TYPE const * + const ANONYMOUS_TYPE + ( ANONYMOUS_TYPE ) + ANONYMOUS_TYPE ( ) + ANONYMOUS_TYPE ( ARGS ) + +############### TOP LEVEL +TOP_LEVEL nonterm .start + TOP_LEVEL TYPEDEF + TOP_LEVEL STRUCTDECL + TOP_LEVEL FUNCTION + TOP_LEVEL DECLARATION_STATEMENT + TYPEDEF + STRUCTDECL + FUNCTION + DECLARATION_STATEMENT + TOP_LEVEL ERROR + ERROR + +ARGS nonterm + ANONYMOUS_TYPE + ARGS , ANONYMOUS_TYPE + DECLARATION + ARGS , DECLARATION + +CALL_ARGS nonterm + CALL_ARGS , EXPR + EXPR + +OLD_ARGS nonterm + OLD_ARGS , IDENT + IDENT + +OLD_ARG_DECLS nonterm + OLD_ARG_DECLS DECLARATION_STATEMENT + DECLARATION_STATEMENT + +FUNCTION nonterm + DECLARATION ( ) TRUE_BLOCK + DECLARATION ( ARGS ) TRUE_BLOCK + DECLARATION ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK + IDENT ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK + +AGGREGATE_DECLARATION nonterm + { STMTS } + { } + +TYPEDEF nonterm + typedef PRIMITIVE_TYPE TYPE_EXPRESSION ; + +STRUCTDECL nonterm + struct IDENT AGGREGATE_DECLARATION ; + +UNIONDECL nonterm + union IDENT AGGREGATE_DECLARATION ; + +EXPR nonterm + INT + STRING + CHAR + IDENT + EXPR -- + EXPR ++ + -- EXPR + ++ EXPR + - EXPR + + EXPR + & EXPR + * EXPR + ( ANONYMOUS_TYPE ) EXPR + EXPR ( ) + EXPR ( CALL_ARGS ) + EXPR OP EXPR + EXPR ? EXPR : EXPR + EXPR ? : EXPR + EXPR [ EXPR ] + ! EXPR + ( EXPR ) + sizeof EXPR + sizeof ANONYMOUS_TYPE + INITIALIZER_LIST + EXPR EXPR + +INITIALIZER_LIST nonterm + { INNER_INITIALIZER_LIST } + { } + +INNER_INITIALIZER_LIST nonterm + EXPR + INNER_INITIALIZER_LIST , EXPR + INNER_INITIALIZER_LIST , + +IF nonterm + if ( EXPR ) BLOCK + if ( EXPR ) BLOCK else BLOCK + +WHILE nonterm + while ( EXPR ) BLOCK + +DO nonterm + do BLOCK while ( EXPR ) + +FOR nonterm + for ( ; ; ) BLOCK + for ( ; ; EXPR ) BLOCK + for ( ; EXPR ; ) BLOCK + for ( ; EXPR ; EXPR ) BLOCK + for ( EXPR ; ; ) BLOCK + for ( EXPR ; ; EXPR ) BLOCK + for ( EXPR ; EXPR ; ) BLOCK + for ( EXPR ; EXPR ; EXPR ) BLOCK + +SWITCH nonterm + switch ( EXPR ) BLOCK + +DECLARATION_CHAIN nonterm + DECLARATION_CHAIN , TYPE_EXPRESSION + TYPE_EXPRESSION + DECLARATION_CHAIN , TYPE_EXPRESSION = EXPR + TYPE_EXPRESSION = EXPR + +DECLARATION_STATEMENT nonterm + PRIMITIVE_TYPE DECLARATION_CHAIN ; + +RETURN nonterm + return EXPR ; + return ; + +BREAK nonterm + break ; + +CONTINUE nonterm + continue ; + +LABEL nonterm + IDENT : STMT + +CASE nonterm + case EXPR : STMT + +STMT nonterm + TRUE_BLOCK + LABEL + CASE + BREAK + CONTINUE + RETURN + IF + WHILE + DO + FOR + SWITCH + DECLARATION_STATEMENT + EXPR ; + ; + +STMTS nonterm + STMTS STMT + STMT + STMTS ERROR + ERROR + +TRUE_BLOCK nonterm + { } + { STMTS } + +BLOCK nonterm + TRUE_BLOCK + STMT diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt deleted file mode 100644 index ffe85c3..0000000 --- a/grammars/c/grammar.txt +++ /dev/null @@ -1,130 +0,0 @@ -KEYWORDS list - switch volatile case while do else const for if - struct union typedef void - -IDENT regex - [a-zA-Z_][0-9a-zA-Z_]* - -INT regex - [0-9]+ - -OP list - ( ) { } [ ] - ; , - - + ! % * & / << >> ^ | - -= += != %= *= &= /= <<= == >>= ^= |= - && || ++ -- - < <= > >= = - . -> ? : - -############### TYPE PARSING -# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing, -# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression) -PRIMITIVE_TYPE nonterm - struct IDENT - union IDENT - struct IDENT AGGREGATE_DECLARATION - union IDENT AGGREGATE_DECLARATION - const PRIMITIVE_TYPE - volatile PRIMITIVE_TYPE - void - IDENT - -# A TYPE_EXPRESSION is basically an lvalue expression. -TYPE_EXPRESSION nonterm - IDENT - TYPE_EXPRESSION [ ] - TYPE_EXPRESSION [ INT ] - * TYPE_EXPRESSION - ( TYPE_EXPRESSION ) - TYPE_EXPRESSION ( ) - TYPE_EXPRESSION ( ARGS ) - -DECLARATION nonterm - PRIMITIVE_TYPE TYPE_EXPRESSION - -# An ANONYMOUS_TYPE has no name -ANONYMOUS_TYPE nonterm - PRIMITIVE_TYPE - ANONYMOUS_TYPE [ ] - ANONYMOUS_TYPE [ INT ] - * ANONYMOUS_TYPE - ( ANONYMOUS_TYPE ) - ANONYMOUS_TYPE ( ) - ANONYMOUS_TYPE ( ARGS ) - -############### TOP LEVEL -TOP_LEVEL nonterm .start - TYPEDEF - FUNCTION - -ARGS nonterm - ANONYMOUS_TYPE - ANONYMOUS_TYPE , ARGS - DECLARATION - DECLARATION , ARGS - -FUNCTION nonterm - DECLARATION ( ) TRUE_BLOCK - DECLARATION ( ARGS ) TRUE_BLOCK - -AGGREGATE_DECLARATION nonterm - { STMTS } - -TYPEDEF nonterm - typedef PRIMITIVE_TYPE TYPE_EXPRESSION ; - -EXPR nonterm - INT - IDENT - EXPR -- - EXPR ++ - EXPR OP EXPR - EXPR ? EXPR : EXPR - -IF nonterm - if ( EXPR ) BLOCK - if ( EXPR ) BLOCK else BLOCK - -WHILE nonterm - while ( EXPR ) BLOCK - -DO nonterm - do BLOCK while ( EXPR ) - -FOR nonterm - for ( EXPR ; EXPR ; EXPR ) BLOCK - -SWITCH nonterm - switch ( EXPR ) BLOCK - -DECLARATION_CHAIN nonterm - TYPE_EXPRESSION - TYPE_EXPRESSION , DECLARATION_CHAIN - TYPE_EXPRESSION = EXPR - TYPE_EXPRESSION = EXPR , DECLARATION_CHAIN - -DECLARATION_STATEMENT nonterm - PRIMITIVE_TYPE DECLARATION_CHAIN ; - -STMT nonterm - IF - WHILE - DO - FOR - SWITCH - # NOTE: it auto-prefers declarations right now - DECLARATION_STATEMENT .name DECL_STMT - EXPR ; - -STMTS nonterm - STMT - STMT STMTS - -TRUE_BLOCK nonterm - { } - { STMTS } - -BLOCK nonterm - TRUE_BLOCK - STMT diff --git a/grammars/c/preprocess.c b/grammars/c/preprocess.c new file mode 100644 index 0000000..3ae7406 --- /dev/null +++ b/grammars/c/preprocess.c @@ -0,0 +1,45 @@ +void preprocess(char *string, size_t length) { + int on_newline = 1; + for (int i = 0; i < length;) { + switch (string[i]) { + case '/': { + on_newline = 0; + if (string[i+1] == '*') { + for (; i+1 < length; i++) { + if (string[i] == '*' && string[i+1] == '/') { + string[i] = ' '; + string[i+1] = ' '; + break; + } + string[i] = ' '; + } + continue; + } else if (string[i+1] == '/') { + for (; i < length; i++) { + if (string[i] == '\n') { + string[i] = ' '; + break; + } + string[i] = ' '; + } + continue; + } + break; + } + case '#': { + if (on_newline) { + int escaped = 0; + for (i++; i < length; i++) { + if (string[i] == '\n' && !escaped) break; + escaped = (string[i] == '\\'); + string[i] = ' '; + } + break; + } + } + case '\n': on_newline = 1; break; + default: on_newline = 0; break; + } + i++; + } +} diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..190c312 --- /dev/null +++ b/parse.py @@ -0,0 +1,6 @@ +import earlpy +import sys + +p = earlpy.Parser(sys.argv[1]) +if len(sys.argv) == 3: + print(p.parse_file(sys.argv[2]).pprint()) |