summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-03-11 16:33:24 -0700
committerMatthew Sotoudeh <matthew@masot.net>2024-03-11 16:33:24 -0700
commita9292a98cc6c65e2a4ad6da20937ef7568a4143d (patch)
treef921866c475208359285ff3f606c0fafa1812a32
parent35fa21e59ad44de3ac5d075a3c1ae60d462a1a13 (diff)
earlpy
-rw-r--r--.gitignore1
-rw-r--r--DESIGN.txt57
-rw-r--r--earlpy/earlpy.py146
-rw-r--r--examples/check_errors.py14
-rw-r--r--examples/dash_var.c678
-rw-r--r--examples/error.c2
-rw-r--r--examples/simple.c2
-rw-r--r--examples/small_dash_var.c678
-rw-r--r--examples/tiny.c430
-rwxr-xr-xextras/ftdetect/earlpy.vim1
-rw-r--r--extras/syntax/earlpy.vim20
-rw-r--r--grammars/c/disambiguate.c84
-rw-r--r--grammars/c/grammar.earlpy265
-rw-r--r--grammars/c/grammar.txt130
-rw-r--r--grammars/c/preprocess.c45
-rw-r--r--parse.py6
16 files changed, 2375 insertions, 184 deletions
diff --git a/.gitignore b/.gitignore
index 2e325a5..79120c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ __pycache__
parser
parser.c
parser.l
+!earlpy/parser.c
diff --git a/DESIGN.txt b/DESIGN.txt
index 0762ca6..0fafff0 100644
--- a/DESIGN.txt
+++ b/DESIGN.txt
@@ -1,3 +1,60 @@
+====== Performance Issues ========
+The biggest performance issues involve parsing grammar
+
+ LIST nonterm
+ LIST , EXPR
+ EXPR
+
+With a huge input of
+
+ A , A , A , A , ...
+
+Naively, the parser will try to match an EXPR starting at each of the As.
+
+I guess this is because even if you are associating so far like
+
+ (((A, A), A), A), ...
+
+It doesn't know if, later on in the parse, you might want to reassociate 'across
+the boundary':
+
+ (((A, A), A), (A, B)), ...
+
+At this point, I don't think it's worth trying to do much more general-purpose
+optimization for this case. Instead, I think we should have callouts to user
+code to give hints. Two possible hint types:
+
+ 1. "Parse this region as this parse tree." Easy to use: when you get there,
+ just skip all of those indices and complete that tree with any watchers.
+ (Might need to do prediction at the first index.)
+
+Actually, that's probably by far the easiest.
+
+====== More Disambiguation Issues ========
+What if we poison X, use it in completion Y, but then overwrite X with a
+nonpoisoned Z? Then Y will be incorrectly poisoned...
+
+====== More Disambiguation Issues ========
+Consider
+
+ STMTS nonterm .start
+ STMTS STMT
+ STMT
+
+vs.
+
+ STMTS nonterm .start
+ STMT
+ STMTS STMT
+
+Swapping this can actually impact what matches happen in STMT :O
+
+====== More Disambiguation Issues ========
+Associativity and precedence can have weird interplay
+
+E.g., maybe you can get Stmts(Error(1), Stmt(2, 3)) which has better
+associativity than Stmts(Stmt(1, 2), Stmt(3))
+
====== Disambiguation Issues ========
Consider two possible parses of 0 + 1 * 5 + 4
( ( 0 + ( 1 * 5 ) ) + 4 )
diff --git a/earlpy/earlpy.py b/earlpy/earlpy.py
index 3b0deab..2944c51 100644
--- a/earlpy/earlpy.py
+++ b/earlpy/earlpy.py
@@ -9,9 +9,8 @@ DIR = pathlib.Path(__file__).parent.resolve()
class Parser:
def __init__(self, parser_dir):
- assert parser_dir and parser_dir[0] != '/'
- parser_dir = parser_dir
- files = sorted([f"{parser_dir}/grammar.txt",
+ assert parser_dir and parser_dir != '/'
+ files = sorted([f"{parser_dir}/grammar.earlpy",
*glob(f"{parser_dir}/*.c"),
f"{DIR}/parser.c",
__file__])
@@ -27,7 +26,7 @@ class Parser:
if open(lex_path, "r").readline()[3:][:-3].strip() == hashes:
already_built = True
- lines = self.parse_grammar(f"{parser_dir}/grammar.txt")
+ lines = self.parse_grammar(f"{parser_dir}/grammar.earlpy")
if not already_built:
if glob(f"{parser_dir}/parser"):
subprocess.run(f"rm {parser_dir}/parser", shell=True)
@@ -43,7 +42,7 @@ class Parser:
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if res.returncode: print(res.stderr.decode("utf-8"))
assert res.returncode == 0
- res = subprocess.run(f"gcc -O3 {parser_dir}/parser.c -o {parser_dir}/parser",
+ res = subprocess.run(f"gcc -g -O3 {parser_dir}/parser.c -ljemalloc -o {parser_dir}/parser",
shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if res.returncode: print(res.stderr.decode("utf-8"))
assert res.returncode == 0
@@ -66,12 +65,20 @@ class Parser:
raise ValueError
contents = open(path, "r").read()
+ offset_to_line = dict()
+ line = 1
+ for i, c in enumerate(open(path, "rb").read()):
+ offset_to_line[i] = line
+ if c == '\n' or chr(c) == '\n': line += 1
n_tokens, = struct.unpack("Q", res.stdout[:8])
# symbol id, start idx, length
tokens = list(struct.iter_unpack("QQQ", res.stdout[8:8+(8*3*n_tokens)]))
- tokens = [(token, contents[token[1]:token[1]+token[2]])
- for token in tokens]
+ tokens = [Token(self.id_to_symbol[symbol],
+ contents[offset:offset+length],
+ offset_to_line[offset],
+ path)
+ for (symbol, offset, length) in tokens]
# production id
nodes = [t[0] for t in struct.iter_unpack("Q", res.stdout[8+(8*3*n_tokens):])]
# print(nodes)
@@ -83,13 +90,7 @@ class Parser:
stack = [root]
while stack:
node = stack[-1]
- # print(len(stack))
- # if isinstance(node, tuple):
- # print("\t", node)
- # else:
- # print("\t", node.symbol.name, [s.name for s in node.production])
-
- if (isinstance(node, tuple)
+ if (isinstance(node, Token)
or len(node.production) == len(node.contents)):
stack.pop()
if stack: stack[-1].contents.append(node)
@@ -220,11 +221,16 @@ class Parser:
put(", \"" + symbol.name + "\"")
put(" };")
for symbol in ordered_symbols:
- if symbol.name.isalnum():
+ if symbol.name.replace("_", "").isalnum():
putl(f"#define SYMBOL_{symbol.name} {symbol.id}")
if symbol.is_start:
putl(f"#define START_SYMBOL {symbol.id}")
+ putl("char SYMBOL_TO_POISON[] = { 0")
+ for symbol in ordered_symbols:
+ put(", " + ("1" if symbol.poisoned else "0"))
+ put(" };")
+
putl("prod_id_t SYMBOL_ID_TO_PRODUCTION_IDS[N_SYMBOLS][MAX_N_PRODUCTIONS] = { {0}")
# [(production, Symbol), ...]
self.productions = [([], None, None)]
@@ -265,6 +271,37 @@ class Parser:
if i != 0: put(f", {symbol.id}")
put(" };")
+ # Production hints: for this production, what does the leading symbol
+ # need to be?
+ # symbol -> symbol | True (multiple)
+ symbol_to_first = {symbol: symbol
+ for symbol in self.id_to_symbol.values()
+ if symbol.kind != "nonterm"}
+ fixedpoint = False
+ while not fixedpoint:
+ fixedpoint = True
+ for symbol in self.id_to_symbol.values():
+ if symbol.kind != "nonterm": continue
+ head_symbols = [self.name_to_symbol[production[0]]
+ for production in symbol.contents]
+ firsts = [symbol_to_first.get(head, None)
+ for head in head_symbols]
+ new_first = (firsts[0] if all(f == firsts[0] for f in firsts)
+ else True)
+ if symbol_to_first.get(symbol, None) != new_first:
+ symbol_to_first[symbol] = new_first
+ fixedpoint = False
+
+ putl("symbol_id_t PRODUCTION_ID_TO_FIRST[N_PRODUCTIONS] = { 0")
+ for i, (production, _, _) in enumerate(self.productions):
+ if i == 0: continue
+ if not production or symbol_to_first.get(production[0], True) is True:
+ put(", 0")
+ else:
+ put(f", {symbol_to_first[production[0]].id}")
+ put(" };")
+
+ ##### DONE: output the lexer
putl("void lex_symbol(symbol_id_t);")
putl("%}")
putl("%%")
@@ -292,6 +329,7 @@ class Symbol:
self.name = parts[0]
self.kind = parts[1]
self.is_start = ".start" in parts[2:]
+ self.poisoned = ".poison" in parts[2:]
self.contents = []
self.production_names = []
self.id = None
@@ -321,11 +359,87 @@ class Node:
self.production = production
self.contents = []
+ def line_numbers(self):
+ return self.contents[0].line_numbers()
+
+ def max_line_numbers(self):
+ return self.contents[-1].max_line_numbers()
+
+ def file_name(self):
+ return self.contents[-1].file_name()
+
def pprint(self):
def pprint(other):
if isinstance(other, Node):
return other.pprint()
- return other[1]
+ return other.pprint()
if len(self.contents) == 1:
return pprint(self.contents[0])
return '(' + ' '.join(map(pprint, self.contents)) + ')'
+
+ def print_tree(self, depth=0):
+ print((' ' * depth) + self.symbol.name)
+ for arg in self.contents:
+ arg.print_tree(depth + 2)
+
+ def isa(self, *patterns):
+ for pattern in patterns:
+ if "->" in pattern:
+ symbol, production = pattern.split("->")
+ symbol = symbol.strip()
+ if symbol != self.symbol.name: continue
+ production = production.split()
+ if production[-1] != "..." and len(production) != len(self.pprint_production().split()[2:]):
+ continue
+ for desired, real in zip(production, self.pprint_production().split()[2:]):
+ if desired == "...": return True
+ if desired != real: break
+ else: return True
+ else:
+ symbol = pattern.strip()
+ if symbol == self.symbol.name:
+ return True
+ return False
+
+ def hasa(self, symbol):
+ return any(sub.name == symbol for sub in self.production)
+
+ def pprint_production(self):
+ parts = []
+ for s in self.production:
+ if "::" in s.name: parts.append(s.name[s.name.index("::")+2:])
+ else: parts.append(s.name)
+ return f"{self.symbol.name} -> {' '.join(parts)}"
+
+ def find(self, kind, which=0, total=1):
+ found = []
+ for s in self.subtrees():
+ if s.symbol.name == kind:
+ found.append(s)
+ if len(found) != total: raise ValueError
+ return found[which]
+
+ def subtrees(self): return self.contents
+ def __getitem__(self, i): return self.contents[i]
+
+class Token:
+ def __init__(self, symbol, string, line_number, file_name):
+ self.symbol = symbol
+ self.string = string
+ self.line_number = line_number
+ self.file_name_ = file_name
+
+ def pprint(self):
+ return self.string
+
+ def line_numbers(self):
+ return {self.line_number}
+
+ def file_name(self):
+ return self.file_name_
+
+ def max_line_numbers(self):
+ return self.line_numbers()
+
+ def print_tree(self, depth=0):
+ print((' ' * depth) + self.symbol.name , self.string , self.line_number)
diff --git a/examples/check_errors.py b/examples/check_errors.py
new file mode 100644
index 0000000..70f9f13
--- /dev/null
+++ b/examples/check_errors.py
@@ -0,0 +1,14 @@
+import earlpy
+import sys
+
+p = earlpy.Parser("grammars/c")
+node = p.parse_file(sys.argv[1])
+node.print_tree()
+def visit(n):
+ if isinstance(n, earlpy.Token):
+ return
+ if n.symbol.name == "ERROR":
+ print(n.line_numbers(), n.pprint())
+ else:
+ for a in n.contents: visit(a)
+visit(node)
diff --git a/examples/dash_var.c b/examples/dash_var.c
new file mode 100644
index 0000000..b70d72c
--- /dev/null
+++ b/examples/dash_var.c
@@ -0,0 +1,678 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1997-2005
+ * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Kenneth Almquist.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_PATHS_H
+#include <paths.h>
+#endif
+
+/*
+ * Shell variables.
+ */
+
+#include "shell.h"
+#include "output.h"
+#include "expand.h"
+#include "nodes.h" /* for other headers */
+#include "exec.h"
+#include "syntax.h"
+#include "options.h"
+#include "mail.h"
+#include "var.h"
+#include "memalloc.h"
+#include "error.h"
+#include "mystring.h"
+#include "parser.h"
+#include "show.h"
+#ifndef SMALL
+#include "myhistedit.h"
+#endif
+#include "system.h"
+
+
+#define VTABSIZE 39
+
+
+struct localvar_list {
+ struct localvar_list *next;
+ struct localvar *lv;
+};
+
+MKINIT struct localvar_list *localvar_stack;
+
+const char defpathvar[] =
+ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
+char defifsvar[] = "IFS= \t\n";
+MKINIT char defoptindvar[] = "OPTIND=1";
+
+int lineno;
+char linenovar[sizeof("LINENO=")+sizeof(int)*CHAR_BIT/3+1] = "LINENO=";
+
+/* Some macros in var.h depend on the order, add new variables to the end. */
+struct var varinit[] = {
+#if ATTY
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 },
+#endif
+ { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail },
+ { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset },
+#ifdef WITH_LINENO
+ { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 },
+#endif
+#ifndef SMALL
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "TERM\0", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "HISTSIZE\0", sethistsize },
+#endif
+};
+
+STATIC struct var *vartab[VTABSIZE];
+
+STATIC struct var **hashvar(const char *);
+STATIC int vpcmp(const void *, const void *);
+STATIC struct var **findvar(struct var **, const char *);
+
+/*
+ * Initialize the varable symbol tables and import the environment
+ */
+
+#ifdef mkinit
+INCLUDE <unistd.h>
+INCLUDE <sys/types.h>
+INCLUDE <sys/stat.h>
+INCLUDE "cd.h"
+INCLUDE "output.h"
+INCLUDE "var.h"
+MKINIT char **environ;
+INIT {
+ char **envp;
+ static char ppid[32] = "PPID=";
+ const char *p;
+ struct stat64 st1, st2;
+
+ initvar();
+ for (envp = environ ; *envp ; envp++) {
+ p = endofname(*envp);
+ if (p != *envp && *p == '=') {
+ setvareq(*envp, VEXPORT|VTEXTFIXED);
+ }
+ }
+
+ setvareq(defifsvar, VTEXTFIXED);
+ setvareq(defoptindvar, VTEXTFIXED);
+
+ fmtstr(ppid + 5, sizeof(ppid) - 5, "%ld", (long) getppid());
+ setvareq(ppid, VTEXTFIXED);
+
+ p = lookupvar("PWD");
+ if (p)
+ if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) ||
+ st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino)
+ p = 0;
+ setpwd(p, 0);
+}
+
+RESET {
+ unwindlocalvars(0);
+}
+#endif
+
+static char *varnull(const char *s)
+{
+ return (strchr(s, '=') ?: nullstr - 1) + 1;
+}
+
+/*
+ * This routine initializes the builtin variables. It is called when the
+ * shell is initialized.
+ */
+
+void
+initvar(void)
+{
+ struct var *vp;
+ struct var *end;
+ struct var **vpp;
+
+ vp = varinit;
+ end = vp + sizeof(varinit) / sizeof(varinit[0]);
+ do {
+ vpp = hashvar(vp->text);
+ vp->next = *vpp;
+ *vpp = vp;
+ } while (++vp < end);
+ /*
+ * PS1 depends on uid
+ */
+ if (!geteuid())
+ vps1.text = "PS1=# ";
+}
+
+/*
+ * Set the value of a variable. The flags argument is ored with the
+ * flags of the variable. If val is NULL, the variable is unset.
+ */
+
+struct var *setvar(const char *name, const char *val, int flags)
+{
+ char *p, *q;
+ size_t namelen;
+ char *nameeq;
+ size_t vallen;
+ struct var *vp;
+
+ q = endofname(name);
+ p = strchrnul(q, '=');
+ namelen = p - name;
+ if (!namelen || p != q)
+ sh_error("%.*s: bad variable name", namelen, name);
+ vallen = 0;
+ if (val == NULL) {
+ flags |= VUNSET;
+ } else {
+ vallen = strlen(val);
+ }
+ INTOFF;
+ p = mempcpy(nameeq = ckmalloc(namelen + vallen + 2), name, namelen);
+ if (val) {
+ *p++ = '=';
+ p = mempcpy(p, val, vallen);
+ }
+ *p = '\0';
+ vp = setvareq(nameeq, flags | VNOSAVE);
+ INTON;
+
+ return vp;
+}
+
+/*
+ * Set the given integer as the value of a variable. The flags argument is
+ * ored with the flags of the variable.
+ */
+
+intmax_t setvarint(const char *name, intmax_t val, int flags)
+{
+ int len = max_int_length(sizeof(val));
+ char buf[len];
+
+ fmtstr(buf, len, "%" PRIdMAX, val);
+ setvar(name, buf, flags);
+ return val;
+}
+
+
+
+/*
+ * Same as setvar except that the variable and value are passed in
+ * the first argument as name=value. Since the first argument will
+ * be actually stored in the table, it should not be a string that
+ * will go away.
+ * Called with interrupts off.
+ */
+
+struct var *setvareq(char *s, int flags)
+{
+ struct var *vp, **vpp;
+
+ vpp = hashvar(s);
+ flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1));
+ vpp = findvar(vpp, s);
+ vp = *vpp;
+ if (vp) {
+ if (vp->flags & VREADONLY) {
+ const char *n;
+
+ if (flags & VNOSAVE)
+ free(s);
+ n = vp->text;
+ sh_error("%.*s: is read only", strchrnul(n, '=') - n,
+ n);
+ }
+
+ if (flags & VNOSET)
+ goto out;
+
+ if (vp->func && (flags & VNOFUNC) == 0)
+ (*vp->func)(varnull(s));
+
+ if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0)
+ ckfree(vp->text);
+
+ if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) |
+ (vp->flags & VSTRFIXED)) == VUNSET) {
+ *vpp = vp->next;
+ ckfree(vp);
+out_free:
+ if ((flags & (VTEXTFIXED|VSTACK|VNOSAVE)) == VNOSAVE)
+ ckfree(s);
+ goto out;
+ }
+
+ flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET);
+ } else {
+ if (flags & VNOSET)
+ goto out;
+ if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET)
+ goto out_free;
+ /* not found */
+ vp = ckmalloc(sizeof (*vp));
+ vp->next = *vpp;
+ vp->func = NULL;
+ *vpp = vp;
+ }
+ if (!(flags & (VTEXTFIXED|VSTACK|VNOSAVE)))
+ s = savestr(s);
+ vp->text = s;
+ vp->flags = flags;
+
+out:
+ return vp;
+}
+
+/*
+ * Find the value of a variable. Returns NULL if not set.
+ */
+
+char *
+lookupvar(const char *name)
+{
+ struct var *v;
+
+ if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) {
+#ifdef WITH_LINENO
+ if (v == &vlineno && v->text == linenovar) {
+ fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno);
+ }
+#endif
+ return strchrnul(v->text, '=') + 1;
+ }
+ return NULL;
+}
+
+intmax_t lookupvarint(const char *name)
+{
+ return atomax(lookupvar(name) ?: nullstr, 0);
+}
+
+
+
+/*
+ * Generate a list of variables satisfying the given conditions.
+ */
+
+char **
+listvars(int on, int off, char ***end)
+{
+ struct var **vpp;
+ struct var *vp;
+ char **ep;
+ int mask;
+
+ STARTSTACKSTR(ep);
+ vpp = vartab;
+ mask = on | off;
+ do {
+ for (vp = *vpp ; vp ; vp = vp->next)
+ if ((vp->flags & mask) == on) {
+ if (ep == stackstrend())
+ ep = growstackstr();
+ *ep++ = (char *) vp->text;
+ }
+ } while (++vpp < vartab + VTABSIZE);
+ if (ep == stackstrend())
+ ep = growstackstr();
+ if (end)
+ *end = ep;
+ *ep++ = NULL;
+ return grabstackstr(ep);
+}
+
+
+
+/*
+ * POSIX requires that 'set' (but not export or readonly) output the
+ * variables in lexicographic order - by the locale's collating order (sigh).
+ * Maybe we could keep them in an ordered balanced binary tree
+ * instead of hashed lists.
+ * For now just roll 'em through qsort for printing...
+ */
+
+int
+showvars(const char *prefix, int on, int off)
+{
+ const char *sep;
+ char **ep, **epend;
+
+ ep = listvars(on, off, &epend);
+ qsort(ep, epend - ep, sizeof(char *), vpcmp);
+
+ sep = *prefix ? spcstr : prefix;
+
+ for (; ep < epend; ep++) {
+ const char *p;
+ const char *q;
+
+ p = strchrnul(*ep, '=');
+ q = nullstr;
+ if (*p)
+ q = single_quote(++p);
+
+ out1fmt("%s%s%.*s%s\n", prefix, sep, (int)(p - *ep), *ep, q);
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * The export and readonly commands.
+ */
+
+int
+exportcmd(int argc, char **argv)
+{
+ struct var *vp;
+ char *name;
+ const char *p;
+ char **aptr;
+ int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT;
+ int notp;
+
+ notp = nextopt("p") - 'p';
+ if (notp && ((name = *(aptr = argptr)))) {
+ do {
+ if ((p = strchr(name, '=')) != NULL) {
+ p++;
+ } else {
+ if ((vp = *findvar(hashvar(name), name))) {
+ vp->flags |= flag;
+ continue;
+ }
+ }
+ setvar(name, p, flag);
+ } while ((name = *++aptr) != NULL);
+ } else {
+ showvars(argv[0], flag, 0);
+ }
+ return 0;
+}
+
+
+/*
+ * The "local" command.
+ */
+
+int
+localcmd(int argc, char **argv)
+{
+ char *name;
+
+ if (!localvar_stack)
+ sh_error("not in a function");
+
+ argv = argptr;
+ while ((name = *argv++) != NULL) {
+ mklocal(name, 0);
+ }
+ return 0;
+}
+
+
+/*
+ * Make a variable a local variable. When a variable is made local, it's
+ * value and flags are saved in a localvar structure. The saved values
+ * will be restored when the shell function returns. We handle the name
+ * "-" as a special case.
+ */
+
+void mklocal(char *name, int flags)
+{
+ struct localvar *lvp;
+ struct var **vpp;
+ struct var *vp;
+
+ INTOFF;
+ lvp = ckmalloc(sizeof (struct localvar));
+ if (name[0] == '-' && name[1] == '\0') {
+ char *p;
+ p = ckmalloc(sizeof(optlist));
+ lvp->text = memcpy(p, optlist, sizeof(optlist));
+ vp = NULL;
+ } else {
+ char *eq;
+
+ vpp = hashvar(name);
+ vp = *findvar(vpp, name);
+ eq = strchr(name, '=');
+ if (vp == NULL) {
+ if (eq)
+ vp = setvareq(name, VSTRFIXED | flags);
+ else
+ vp = setvar(name, NULL, VSTRFIXED | flags);
+ lvp->flags = VUNSET;
+ } else {
+ lvp->text = vp->text;
+ lvp->flags = vp->flags;
+ vp->flags |= VSTRFIXED|VTEXTFIXED;
+ if (eq)
+ setvareq(name, flags);
+ }
+ }
+ lvp->vp = vp;
+ lvp->next = localvar_stack->lv;
+ localvar_stack->lv = lvp;
+ INTON;
+}
+
+
+/*
+ * Called after a function returns.
+ * Interrupts must be off.
+ */
+
+static void
+poplocalvars(void)
+{
+ struct localvar_list *ll;
+ struct localvar *lvp, *next;
+ struct var *vp;
+
+ INTOFF;
+ ll = localvar_stack;
+ localvar_stack = ll->next;
+
+ next = ll->lv;
+ ckfree(ll);
+
+ while ((lvp = next) != NULL) {
+ next = lvp->next;
+ vp = lvp->vp;
+ TRACE(("poplocalvar %s\n", vp ? vp->text : "-"));
+ if (vp == NULL) { /* $- saved */
+ memcpy(optlist, lvp->text, sizeof(optlist));
+ ckfree(lvp->text);
+ optschanged();
+ } else if (lvp->flags == VUNSET) {
+ vp->flags &= ~(VSTRFIXED|VREADONLY);
+ unsetvar(vp->text);
+ } else {
+ if (vp->func)
+ (*vp->func)(varnull(lvp->text));
+ if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0)
+ ckfree(vp->text);
+ vp->flags = lvp->flags;
+ vp->text = lvp->text;
+ }
+ ckfree(lvp);
+ }
+ INTON;
+}
+
+
+/*
+ * Create a new localvar environment.
+ */
+struct localvar_list *pushlocalvars(int push)
+{
+ struct localvar_list *ll;
+ struct localvar_list *top;
+
+ top = localvar_stack;
+ if (!push)
+ goto out;
+
+ INTOFF;
+ ll = ckmalloc(sizeof(*ll));
+ ll->lv = NULL;
+ ll->next = top;
+ localvar_stack = ll;
+ INTON;
+
+out:
+ return top;
+}
+
+
+void unwindlocalvars(struct localvar_list *stop)
+{
+ while (localvar_stack != stop)
+ poplocalvars();
+}
+
+
+/*
+ * The unset builtin command. We unset the function before we unset the
+ * variable to allow a function to be unset when there is a readonly variable
+ * with the same name.
+ */
+
+int
+unsetcmd(int argc, char **argv)
+{
+ char **ap;
+ int i;
+ int flag = 0;
+
+ while ((i = nextopt("vf")) != '\0') {
+ flag = i;
+ }
+
+ for (ap = argptr; *ap ; ap++) {
+ if (flag != 'f') {
+ unsetvar(*ap);
+ continue;
+ }
+ if (flag != 'v')
+ unsetfunc(*ap);
+ }
+ return 0;
+}
+
+
+/*
+ * Unset the specified variable.
+ */
+
+void unsetvar(const char *s)
+{
+ setvar(s, 0, 0);
+}
+
+
+
+/*
+ * Find the appropriate entry in the hash table from the name.
+ */
+
+STATIC struct var **
+hashvar(const char *p)
+{
+ unsigned int hashval;
+
+ hashval = ((unsigned char) *p) << 4;
+ while (*p && *p != '=')
+ hashval += (unsigned char) *p++;
+ return &vartab[hashval % VTABSIZE];
+}
+
+
+
+/*
+ * Compares two strings up to the first = or '\0'. The first
+ * string must be terminated by '='; the second may be terminated by
+ * either '=' or '\0'.
+ */
+
+int
+varcmp(const char *p, const char *q)
+{
+ int c, d;
+
+ while ((c = *p) == (d = *q)) {
+ if (!c || c == '=')
+ goto out;
+ p++;
+ q++;
+ }
+ if (c == '=')
+ c = 0;
+ if (d == '=')
+ d = 0;
+out:
+ return c - d;
+}
+
+STATIC int
+vpcmp(const void *a, const void *b)
+{
+ return varcmp(*(const char **)a, *(const char **)b);
+}
+
+STATIC struct var **
+findvar(struct var **vpp, const char *name)
+{
+ for (; *vpp; vpp = &(*vpp)->next) {
+ if (varequal((*vpp)->text, name)) {
+ break;
+ }
+ }
+ return vpp;
+}
diff --git a/examples/error.c b/examples/error.c
new file mode 100644
index 0000000..a0c6cb8
--- /dev/null
+++ b/examples/error.c
@@ -0,0 +1,2 @@
+INIT {
+}
diff --git a/examples/simple.c b/examples/simple.c
index 4d3139c..16629ee 100644
--- a/examples/simple.c
+++ b/examples/simple.c
@@ -1,5 +1,7 @@
+void foo() {
if (1)
if (1)
1;
else
1;
+}
diff --git a/examples/small_dash_var.c b/examples/small_dash_var.c
new file mode 100644
index 0000000..33b3c49
--- /dev/null
+++ b/examples/small_dash_var.c
@@ -0,0 +1,678 @@
+/*-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1997-2005
+ * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Kenneth Almquist.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_PATHS_H
+#include <paths.h>
+#endif
+
+/*
+ * Shell variables.
+ */
+
+#include "shell.h"
+#include "output.h"
+#include "expand.h"
+#include "nodes.h" /* for other headers */
+#include "exec.h"
+#include "syntax.h"
+#include "options.h"
+#include "mail.h"
+#include "var.h"
+#include "memalloc.h"
+#include "error.h"
+#include "mystring.h"
+#include "parser.h"
+#include "show.h"
+#ifndef SMALL
+#include "myhistedit.h"
+#endif
+#include "system.h"
+
+
+#define VTABSIZE 39
+
+
+struct localvar_list {
+ struct localvar_list *next;
+ struct localvar *lv;
+};
+
+MKINIT struct localvar_list *localvar_stack;
+
+const char defpathvar[] =
+ "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
+char defifsvar[] = "IFS= \t\n";
+MKINIT char defoptindvar[] = "OPTIND=1";
+
+int lineno;
+char linenovar[sizeof("LINENO=")+sizeof(int)*CHAR_BIT/3+1] = "LINENO=";
+
+/* Some macros in var.h depend on the order, add new variables to the end. */
+struct var varinit[] = {
+#if ATTY
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "ATTY\0", 0 },
+#endif
+ { 0, VSTRFIXED|VTEXTFIXED, defifsvar, 0 },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAIL\0", changemail },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "MAILPATH\0", changemail },
+ { 0, VSTRFIXED|VTEXTFIXED, defpathvar, changepath },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS1=$ ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS2=> ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, "PS4=+ ", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED, defoptindvar, getoptsreset },
+#ifdef WITH_LINENO
+ { 0, VSTRFIXED|VTEXTFIXED, linenovar, 0 },
+#endif
+#ifndef SMALL
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "TERM\0", 0 },
+ { 0, VSTRFIXED|VTEXTFIXED|VUNSET, "HISTSIZE\0", sethistsize },
+#endif
+};
+
+STATIC struct var *vartab[VTABSIZE];
+
+STATIC struct var **hashvar(const char *);
+STATIC int vpcmp(const void *, const void *);
+STATIC struct var **findvar(struct var **, const char *);
+
+/*
+ * Initialize the varable symbol tables and import the environment
+ */
+
+#ifdef mkinit
+INCLUDE <unistd.h>
+INCLUDE <sys/types.h>
+INCLUDE <sys/stat.h>
+INCLUDE "cd.h"
+INCLUDE "output.h"
+INCLUDE "var.h"
+MKINIT char **environ;
+INIT {
+ char **envp;
+ static char ppid[32] = "PPID=";
+ const char *p;
+ struct stat64 st1, st2;
+
+ initvar();
+ for (envp = environ ; *envp ; envp++) {
+ p = endofname(*envp);
+ if (p != *envp && *p == '=') {
+ setvareq(*envp, VEXPORT|VTEXTFIXED);
+ }
+ }
+
+ setvareq(defifsvar, VTEXTFIXED);
+ setvareq(defoptindvar, VTEXTFIXED);
+
+ fmtstr(ppid + 5, sizeof(ppid) - 5, "%ld", (long) getppid());
+ setvareq(ppid, VTEXTFIXED);
+
+ p = lookupvar("PWD");
+ if (p)
+ if (*p != '/' || stat64(p, &st1) || stat64(".", &st2) ||
+ st1.st_dev != st2.st_dev || st1.st_ino != st2.st_ino)
+ p = 0;
+ setpwd(p, 0);
+}
+
+RESET {
+ unwindlocalvars(0);
+}
+#endif
+
+static char *varnull(const char *s)
+{
+ return (strchr(s, '=') ?: nullstr - 1) + 1;
+}
+
+/*
+ * This routine initializes the builtin variables. It is called when the
+ * shell is initialized.
+ */
+
+void
+initvar(void)
+{
+ struct var *vp;
+ struct var *end;
+ struct var **vpp;
+
+ vp = varinit;
+ end = vp + sizeof(varinit) / sizeof(varinit[0]);
+ do {
+ vpp = hashvar(vp->text);
+ vp->next = *vpp;
+ *vpp = vp;
+ } while (++vp < end);
+ /*
+ * PS1 depends on uid
+ */
+ if (!geteuid())
+ vps1.text = "PS1=# ";
+}
+
+/*
+ * Set the value of a variable. The flags argument is ored with the
+ * flags of the variable. If val is NULL, the variable is unset.
+ */
+
+struct var *setvar(const char *name, const char *val, int flags)
+{
+ char *p, *q;
+ size_t namelen;
+ char *nameeq;
+ size_t vallen;
+ struct var *vp;
+
+ q = endofname(name);
+ p = strchrnul(q, '=');
+ namelen = p - name;
+ if (!namelen || p != q)
+ sh_error("%.*s: bad variable name", namelen, name);
+ vallen = 0;
+ if (val == NULL) {
+ flags |= VUNSET;
+ } else {
+ vallen = strlen(val);
+ }
+ INTOFF;
+ p = mempcpy(nameeq = ckmalloc(namelen + vallen + 2), name, namelen);
+ if (val) {
+ *p++ = '=';
+ p = mempcpy(p, val, vallen);
+ }
+ *p = '\0';
+ vp = setvareq(nameeq, flags | VNOSAVE);
+ INTON;
+
+ return vp;
+}
+
+/*
+ * Set the given integer as the value of a variable. The flags argument is
+ * ored with the flags of the variable.
+ */
+
+intmax_t setvarint(const char *name, intmax_t val, int flags)
+{
+ int len = max_int_length(sizeof(val));
+ char buf[len];
+
+ fmtstr(buf, len, "%" PRIdMAX, val);
+ setvar(name, buf, flags);
+ return val;
+}
+
+
+
+/*
+ * Same as setvar except that the variable and value are passed in
+ * the first argument as name=value. Since the first argument will
+ * be actually stored in the table, it should not be a string that
+ * will go away.
+ * Called with interrupts off.
+ */
+
+struct var *setvareq(char *s, int flags)
+{
+ struct var *vp, **vpp;
+
+ vpp = hashvar(s);
+ flags |= (VEXPORT & (((unsigned) (1 - aflag)) - 1));
+ vpp = findvar(vpp, s);
+ vp = *vpp;
+ if (vp) {
+ if (vp->flags & VREADONLY) {
+ const char *n;
+
+ if (flags & VNOSAVE)
+ free(s);
+ n = vp->text;
+ sh_error("%.*s: is read only", strchrnul(n, '=') - n,
+ n);
+ }
+
+ if (flags & VNOSET)
+ goto out;
+
+ if (vp->func && (flags & VNOFUNC) == 0)
+ (*vp->func)(varnull(s));
+
+ if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0)
+ ckfree(vp->text);
+
+ if (((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) |
+ (vp->flags & VSTRFIXED)) == VUNSET) {
+ *vpp = vp->next;
+ ckfree(vp);
+out_free:
+ if ((flags & (VTEXTFIXED|VSTACK|VNOSAVE)) == VNOSAVE)
+ ckfree(s);
+ goto out;
+ }
+
+ flags |= vp->flags & ~(VTEXTFIXED|VSTACK|VNOSAVE|VUNSET);
+ } else {
+ if (flags & VNOSET)
+ goto out;
+ if ((flags & (VEXPORT|VREADONLY|VSTRFIXED|VUNSET)) == VUNSET)
+ goto out_free;
+ /* not found */
+ vp = ckmalloc(sizeof (*vp));
+ vp->next = *vpp;
+ vp->func = NULL;
+ *vpp = vp;
+ }
+ if (!(flags & (VTEXTFIXED|VSTACK|VNOSAVE)))
+ s = savestr(s);
+ vp->text = s;
+ vp->flags = flags;
+
+out:
+ return vp;
+}
+
+/*
+ * Find the value of a variable. Returns NULL if not set.
+ */
+
+char *
+lookupvar(const char *name)
+{
+ struct var *v;
+
+ if ((v = *findvar(hashvar(name), name)) && !(v->flags & VUNSET)) {
+#ifdef WITH_LINENO
+ if (v == &vlineno && v->text == linenovar) {
+ fmtstr(linenovar+7, sizeof(linenovar)-7, "%d", lineno);
+ }
+#endif
+ return strchrnul(v->text, '=') + 1;
+ }
+ return NULL;
+}
+
+intmax_t lookupvarint(const char *name)
+{
+ return atomax(lookupvar(name) ?: nullstr, 0);
+}
+
+
+
+/*
+ * Generate a list of variables satisfying the given conditions.
+ */
+
+char **
+listvars(int on, int off, char ***end)
+{
+ struct var **vpp;
+ struct var *vp;
+ char **ep;
+ int mask;
+
+ STARTSTACKSTR(ep);
+ vpp = vartab;
+ mask = on | off;
+ do {
+ for (vp = *vpp ; vp ; vp = vp->next)
+ if ((vp->flags & mask) == on) {
+ if (ep == stackstrend())
+ ep = growstackstr();
+ *ep++ = (char *) vp->text;
+ }
+ } while (++vpp < vartab + VTABSIZE);
+ if (ep == stackstrend())
+ ep = growstackstr();
+ if (end)
+ *end = ep;
+ *ep++ = NULL;
+ return grabstackstr(ep);
+}
+
+
+
+/*
+ * POSIX requires that 'set' (but not export or readonly) output the
+ * variables in lexicographic order - by the locale's collating order (sigh).
+ * Maybe we could keep them in an ordered balanced binary tree
+ * instead of hashed lists.
+ * For now just roll 'em through qsort for printing...
+ */
+
+int
+showvars(const char *prefix, int on, int off)
+{
+ const char *sep;
+ char **ep, **epend;
+
+ ep = listvars(on, off, &epend);
+ qsort(ep, epend - ep, sizeof(char *), vpcmp);
+
+ sep = *prefix ? spcstr : prefix;
+
+ for (; ep < epend; ep++) {
+ const char *p;
+ const char *q;
+
+ p = strchrnul(*ep, '=');
+ q = nullstr;
+ if (*p)
+ q = single_quote(++p);
+
+ out1fmt("%s%s%.*s%s\n", prefix, sep, (int)(p - *ep), *ep, q);
+ }
+
+ return 0;
+}
+
+
+
+/*
+ * The export and readonly commands.
+ */
+
+int
+exportcmd(int argc, char **argv)
+{
+ struct var *vp;
+ char *name;
+ const char *p;
+ char **aptr;
+ int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT;
+ int notp;
+
+ notp = nextopt("p") - 'p';
+ if (notp && ((name = *(aptr = argptr)))) {
+ do {
+ if ((p = strchr(name, '=')) != NULL) {
+ p++;
+ } else {
+ if ((vp = *findvar(hashvar(name), name))) {
+ vp->flags |= flag;
+ continue;
+ }
+ }
+ setvar(name, p, flag);
+ } while ((name = *++aptr) != NULL);
+ } else {
+ showvars(argv[0], flag, 0);
+ }
+ return 0;
+}
+
+
+/*
+ * The "local" command.
+ */
+
+int
+localcmd(int argc, char **argv)
+{
+ char *name;
+
+ if (!localvar_stack)
+ sh_error("not in a function");
+
+ argv = argptr;
+ while ((name = *argv++) != NULL) {
+ mklocal(name, 0);
+ }
+ return 0;
+}
+
+
+/*
+ * Make a variable a local variable. When a variable is made local, it's
+ * value and flags are saved in a localvar structure. The saved values
+ * will be restored when the shell function returns. We handle the name
+ * "-" as a special case.
+ */
+
+void mklocal(char *name, int flags)
+{
+ struct localvar *lvp;
+ struct var **vpp;
+ struct var *vp;
+
+ INTOFF;
+ lvp = ckmalloc(sizeof (struct localvar));
+ if (name[0] == '-' && name[1] == '\0') {
+ char *p;
+ p = ckmalloc(sizeof(optlist));
+ lvp->text = memcpy(p, optlist, sizeof(optlist));
+ vp = NULL;
+ } else {
+ char *eq;
+
+ vpp = hashvar(name);
+ vp = *findvar(vpp, name);
+ eq = strchr(name, '=');
+ if (vp == NULL) {
+ if (eq)
+ vp = setvareq(name, VSTRFIXED | flags);
+ else
+ vp = setvar(name, NULL, VSTRFIXED | flags);
+ lvp->flags = VUNSET;
+ } else {
+ lvp->text = vp->text;
+ lvp->flags = vp->flags;
+ vp->flags |= VSTRFIXED|VTEXTFIXED;
+ if (eq)
+ setvareq(name, flags);
+ }
+ }
+ lvp->vp = vp;
+ lvp->next = localvar_stack->lv;
+ localvar_stack->lv = lvp;
+ INTON;
+}
+
+
+/*
+ * Called after a function returns.
+ * Interrupts must be off.
+ */
+
+static void
+poplocalvars(void)
+{
+ struct localvar_list *ll;
+ struct localvar *lvp, *next;
+ struct var *vp;
+
+ INTOFF;
+ ll = localvar_stack;
+ localvar_stack = ll->next;
+
+ next = ll->lv;
+ ckfree(ll);
+
+ while ((lvp = next) != NULL) {
+ next = lvp->next;
+ vp = lvp->vp;
+ TRACE(("poplocalvar %s\n", vp ? vp->text : "-"));
+ if (vp == NULL) { /* $- saved */
+ memcpy(optlist, lvp->text, sizeof(optlist));
+ ckfree(lvp->text);
+ optschanged();
+ } else if (lvp->flags == VUNSET) {
+ vp->flags &= ~(VSTRFIXED|VREADONLY);
+ unsetvar(vp->text);
+ } else {
+ if (vp->func)
+ (*vp->func)(varnull(lvp->text));
+ if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0)
+ ckfree(vp->text);
+ vp->flags = lvp->flags;
+ vp->text = lvp->text;
+ }
+ ckfree(lvp);
+ }
+ INTON;
+}
+
+
+/*
+ * Create a new localvar environment.
+ */
+struct localvar_list *pushlocalvars(int push)
+{
+ struct localvar_list *ll;
+ struct localvar_list *top;
+
+ top = localvar_stack;
+ if (!push)
+ goto out;
+
+ INTOFF;
+ ll = ckmalloc(sizeof(*ll));
+ ll->lv = NULL;
+ ll->next = top;
+ localvar_stack = ll;
+ INTON;
+
+out:
+ return top;
+}
+
+
+void unwindlocalvars(struct localvar_list *stop)
+{
+ while (localvar_stack != stop)
+ poplocalvars();
+}
+
+
+/*
+ * The unset builtin command. We unset the function before we unset the
+ * variable to allow a function to be unset when there is a readonly variable
+ * with the same name.
+ */
+
+int
+unsetcmd(int argc, char **argv)
+{
+ char **ap;
+ int i;
+ int flag = 0;
+
+ while ((i = nextopt("vf")) != '\0') {
+ flag = i;
+ }
+
+ for (ap = argptr; *ap ; ap++) {
+ if (flag != 'f') {
+ unsetvar(*ap);
+ continue;
+ }
+ if (flag != 'v')
+ unsetfunc(*ap);
+ }
+ return 0;
+}
+
+
+/*
+ * Unset the specified variable.
+ */
+
+void unsetvar(const char *s)
+{
+ setvar(s, 0, 0);
+}
+
+
+
+/*
+ * Find the appropriate entry in the hash table from the name.
+ */
+
+STATIC struct var **
+hashvar(const char *p)
+{
+ unsigned int hashval;
+
+ hashval = ((unsigned char) *p) << 4;
+ while (*p && *p != '=')
+ hashval += (unsigned char) *p++;
+ return &vartab[hashval % VTABSIZE];
+}
+
+
+
+/*
+ * Compares two strings up to the first = or '\0'. The first
+ * string must be terminated by '='; the second may be terminated by
+ * either '=' or '\0'.
+ */
+
+int
+varcmp(const char *p, const char *q)
+{
+ int c, d;
+
+ while ((c = *p) == (d = *q)) {
+ if (!c || c == '=')
+ goto out;
+ p++;
+ q++;
+ }
+ if (c == '=')
+ c = 0;
+ if (d == '=')
+ d = 0;
+out:
+ return c - d;
+}
+
+STATIC int
+vpcmp(const void *a, const void *b)
+{
+ return varcmp(*(const char **)a, *(const char **)b);
+}
+
+STATIC struct var **
+findvar(struct var **vpp, const char *name)
+{
+ for (; *vpp; vpp = &(*vpp)->next) {
+ if (varequal((*vpp)->text, name)) {
+ break;
+ }
+ }
+ return vpp;
+}
diff --git a/examples/tiny.c b/examples/tiny.c
new file mode 100644
index 0000000..ff80f0c
--- /dev/null
+++ b/examples/tiny.c
@@ -0,0 +1,430 @@
+int
+save_bash_input (fd, new_fd)
+ int fd, new_fd;
+{
+ int nfd;
+
+ /* Sync the stream so we can re-read from the new file descriptor. We
+ might be able to avoid this by copying the buffered stream verbatim
+ to the new file descriptor. */
+ if (buffers[fd])
+ sync_buffered_stream (fd);
+
+ /* Now take care of duplicating the file descriptor that bash is
+ using for input, so we can reinitialize it later. */
+ nfd = (new_fd == -1) ? fcntl (fd, F_DUPFD, 10) : new_fd;
+ if (nfd == -1)
+ {
+ if (fcntl (fd, F_GETFD, 0) == 0)
+ sys_error (_("cannot allocate new file descriptor for bash input from fd %d"), fd);
+ return -1;
+ }
+
+ if (nfd < nbuffers && buffers[nfd])
+ {
+ /* What's this? A stray buffer without an associated open file
+ descriptor? Free up the buffer and report the error. */
+ internal_error (_("save_bash_input: buffer already exists for new fd %d"), nfd);
+ if (buffers[nfd]->b_flag & B_SHAREDBUF)
+ buffers[nfd]->b_buffer = (char *)NULL;
+ free_buffered_stream (buffers[nfd]);
+ }
+
+ /* Reinitialize bash_input.location. */
+ if (bash_input.type == st_bstream)
+ {
+ bash_input.location.buffered_fd = nfd;
+ fd_to_buffered_stream (nfd);
+ close_buffered_fd (fd); /* XXX */
+ }
+ else
+ /* If the current input type is not a buffered stream, but the shell
+ is not interactive and therefore using a buffered stream to read
+ input (e.g. with an `eval exec 3>output' inside a script), note
+ that the input fd has been changed. pop_stream() looks at this
+ value and adjusts the input fd to the new value of
+ default_buffered_input accordingly. */
+ bash_input_fd_changed++;
+
+ if (default_buffered_input == fd)
+ default_buffered_input = nfd;
+
+ SET_CLOSE_ON_EXEC (nfd);
+ return nfd;
+}
+
+/* Check that file descriptor FD is not the one that bash is currently
+ using to read input from a script. FD is about to be duplicated onto,
+ which means that the kernel will close it for us. If FD is the bash
+ input file descriptor, we need to seek backwards in the script (if
+ possible and necessary -- scripts read from stdin are still unbuffered),
+ allocate a new file descriptor to use for bash input, and re-initialize
+ the buffered stream. Make sure the file descriptor used to save bash
+ input is set close-on-exec. Returns 0 on success, -1 on failure. This
+ works only if fd is > 0 -- if fd == 0 and bash is reading input from
+ fd 0, sync_buffered_stream is used instead, to cooperate with input
+ redirection (look at redir.c:add_undo_redirect()). */
+int
+check_bash_input (fd)
+ int fd;
+{
+ if (fd_is_bash_input (fd))
+ {
+ if (fd > 0)
+ return ((save_bash_input (fd, -1) == -1) ? -1 : 0);
+ else if (fd == 0)
+ return ((sync_buffered_stream (fd) == -1) ? -1 : 0);
+ }
+ return 0;
+}
+
+/* This is the buffered stream analogue of dup2(fd1, fd2). The
+ BUFFERED_STREAM corresponding to fd2 is deallocated, if one exists.
+ BUFFERS[fd1] is copied to BUFFERS[fd2]. This is called by the
+ redirect code for constructs like 4<&0 and 3</etc/rc.local. */
+int
+duplicate_buffered_stream (fd1, fd2)
+ int fd1, fd2;
+{
+ int is_bash_input, m;
+
+ if (fd1 == fd2)
+ return 0;
+
+ m = max (fd1, fd2);
+ ALLOCATE_BUFFERS (m);
+
+ /* If FD2 is the file descriptor bash is currently using for shell input,
+ we need to do some extra work to make sure that the buffered stream
+ actually exists (it might not if fd1 was not active, and the copy
+ didn't actually do anything). */
+ is_bash_input = (bash_input.type == st_bstream) &&
+ (bash_input.location.buffered_fd == fd2);
+
+ if (buffers[fd2])
+ {
+ /* If the two objects share the same b_buffer, don't free it. */
+ if (buffers[fd1] && buffers[fd1]->b_buffer && buffers[fd1]->b_buffer == buffers[fd2]->b_buffer)
+ buffers[fd2] = (BUFFERED_STREAM *)NULL;
+ /* If this buffer is shared with another fd, don't free the buffer */
+ else if (buffers[fd2]->b_flag & B_SHAREDBUF)
+ {
+ buffers[fd2]->b_buffer = (char *)NULL;
+ free_buffered_stream (buffers[fd2]);
+ }
+ else
+ free_buffered_stream (buffers[fd2]);
+ }
+ buffers[fd2] = copy_buffered_stream (buffers[fd1]);
+ if (buffers[fd2])
+ buffers[fd2]->b_fd = fd2;
+
+ if (is_bash_input)
+ {
+ if (!buffers[fd2])
+ fd_to_buffered_stream (fd2);
+ buffers[fd2]->b_flag |= B_WASBASHINPUT;
+ }
+
+ if (fd_is_bash_input (fd1) || (buffers[fd1] && (buffers[fd1]->b_flag & B_SHAREDBUF)))
+ buffers[fd2]->b_flag |= B_SHAREDBUF;
+
+ return (fd2);
+}
+
+/* Return 1 if a seek on FD will succeed. */
+#define fd_is_seekable(fd) (lseek ((fd), 0L, SEEK_CUR) >= 0)
+
+/* Take FD, a file descriptor, and create and return a buffered stream
+ corresponding to it. If something is wrong and the file descriptor
+ is invalid, return a NULL stream. */
+BUFFERED_STREAM *
+fd_to_buffered_stream (fd)
+ int fd;
+{
+ char *buffer;
+ size_t size;
+ struct stat sb;
+
+ if (fstat (fd, &sb) < 0)
+ {
+ close (fd);
+ return ((BUFFERED_STREAM *)NULL);
+ }
+
+ size = (fd_is_seekable (fd)) ? min (sb.st_size, MAX_INPUT_BUFFER_SIZE) : 1;
+ if (size == 0)
+ size = 1;
+ buffer = (char *)xmalloc (size);
+
+ return (make_buffered_stream (fd, buffer, size));
+}
+
+/* Return a buffered stream corresponding to FILE, a file name. */
+BUFFERED_STREAM *
+open_buffered_stream (file)
+ char *file;
+{
+ int fd;
+
+ fd = open (file, O_RDONLY);
+ return ((fd >= 0) ? fd_to_buffered_stream (fd) : (BUFFERED_STREAM *)NULL);
+}
+
+/* Deallocate a buffered stream and free up its resources. Make sure we
+ zero out the slot in BUFFERS that points to BP. */
+void
+free_buffered_stream (bp)
+ BUFFERED_STREAM *bp;
+{
+ int n;
+
+ if (!bp)
+ return;
+
+ n = bp->b_fd;
+ if (bp->b_buffer)
+ free (bp->b_buffer);
+ free (bp);
+ buffers[n] = (BUFFERED_STREAM *)NULL;
+}
+
+/* Close the file descriptor associated with BP, a buffered stream, and free
+ up the stream. Return the status of closing BP's file descriptor. */
+int
+close_buffered_stream (bp)
+ BUFFERED_STREAM *bp;
+{
+ int fd;
+
+ if (!bp)
+ return (0);
+ fd = bp->b_fd;
+ if (bp->b_flag & B_SHAREDBUF)
+ bp->b_buffer = (char *)NULL;
+ free_buffered_stream (bp);
+ return (close (fd));
+}
+
+/* Deallocate the buffered stream associated with file descriptor FD, and
+ close FD. Return the status of the close on FD. */
+int
+close_buffered_fd (fd)
+ int fd;
+{
+ if (fd < 0)
+ {
+ errno = EBADF;
+ return -1;
+ }
+ if (fd >= nbuffers || !buffers || !buffers[fd])
+ return (close (fd));
+ return (close_buffered_stream (buffers[fd]));
+}
+
+/* Make the BUFFERED_STREAM associated with buffers[FD] be BP, and return
+ the old BUFFERED_STREAM. */
+BUFFERED_STREAM *
+set_buffered_stream (fd, bp)
+ int fd;
+ BUFFERED_STREAM *bp;
+{
+ BUFFERED_STREAM *ret;
+
+ ret = buffers[fd];
+ buffers[fd] = bp;
+ return ret;
+}
+
+/* Read a buffer full of characters from BP, a buffered stream. */
+static int
+b_fill_buffer (bp)
+ BUFFERED_STREAM *bp;
+{
+ ssize_t nr;
+ off_t o;
+
+ CHECK_TERMSIG;
+ /* In an environment where text and binary files are treated differently,
+ compensate for lseek() on text files returning an offset different from
+ the count of characters read() returns. Text-mode streams have to be
+ treated as unbuffered. */
+ if ((bp->b_flag & (B_TEXT | B_UNBUFF)) == B_TEXT)
+ {
+ o = lseek (bp->b_fd, 0, SEEK_CUR);
+ nr = zread (bp->b_fd, bp->b_buffer, bp->b_size);
+ if (nr > 0 && nr < lseek (bp->b_fd, 0, SEEK_CUR) - o)
+ {
+ lseek (bp->b_fd, o, SEEK_SET);
+ bp->b_flag |= B_UNBUFF;
+ bp->b_size = 1;
+ nr = zread (bp->b_fd, bp->b_buffer, bp->b_size);
+ }
+ }
+ else
+ nr = zread (bp->b_fd, bp->b_buffer, bp->b_size);
+ if (nr <= 0)
+ {
+ bp->b_used = bp->b_inputp = 0;
+ bp->b_buffer[0] = 0;
+ if (nr == 0)
+ bp->b_flag |= B_EOF;
+ else
+ bp->b_flag |= B_ERROR;
+ return (EOF);
+ }
+
+ bp->b_used = nr;
+ bp->b_inputp = 0;
+ return (bp->b_buffer[bp->b_inputp++] & 0xFF);
+}
+
+/* Get a character from buffered stream BP. */
+#define bufstream_getc(bp) \
+ (bp->b_inputp == bp->b_used || !bp->b_used) \
+ ? b_fill_buffer (bp) \
+ : bp->b_buffer[bp->b_inputp++] & 0xFF
+
+/* Push C back onto buffered stream BP. */
+static int
+bufstream_ungetc(c, bp)
+ int c;
+ BUFFERED_STREAM *bp;
+{
+ if (c == EOF || bp == 0 || bp->b_inputp == 0)
+ return (EOF);
+
+ bp->b_buffer[--bp->b_inputp] = c;
+ return (c);
+}
+
+/* Seek backwards on file BFD to synchronize what we've read so far
+ with the underlying file pointer. */
+int
+sync_buffered_stream (bfd)
+ int bfd;
+{
+ BUFFERED_STREAM *bp;
+ off_t chars_left;
+
+ if (buffers == 0 || (bp = buffers[bfd]) == 0)
+ return (-1);
+
+ chars_left = bp->b_used - bp->b_inputp;
+ if (chars_left)
+ lseek (bp->b_fd, -chars_left, SEEK_CUR);
+ bp->b_used = bp->b_inputp = 0;
+ return (0);
+}
+
+int
+buffered_getchar ()
+{
+ CHECK_TERMSIG;
+
+ if (bash_input.location.buffered_fd < 0 || buffers[bash_input.location.buffered_fd] == 0)
+ return EOF;
+
+#if !defined (DJGPP)
+ return (bufstream_getc (buffers[bash_input.location.buffered_fd]));
+#else
+ /* On DJGPP, ignore \r. */
+ int ch;
+ while ((ch = bufstream_getc (buffers[bash_input.location.buffered_fd])) == '\r')
+ ;
+ return ch;
+#endif
+}
+
+int
+buffered_ungetchar (c)
+ int c;
+{
+ return (bufstream_ungetc (c, buffers[bash_input.location.buffered_fd]));
+}
+
+/* Make input come from file descriptor BFD through a buffered stream. */
+void
+with_input_from_buffered_stream (bfd, name)
+ int bfd;
+ char *name;
+{
+ INPUT_STREAM location;
+ BUFFERED_STREAM *bp;
+
+ location.buffered_fd = bfd;
+ /* Make sure the buffered stream exists. */
+ bp = fd_to_buffered_stream (bfd);
+ init_yy_io (bp == 0 ? return_EOF : buffered_getchar,
+ buffered_ungetchar, st_bstream, name, location);
+}
+
+#if defined (TEST)
+void *
+xmalloc(s)
+int s;
+{
+ return (malloc (s));
+}
+
+void *
+xrealloc(s, size)
+char *s;
+int size;
+{
+ if (!s)
+ return(malloc (size));
+ else
+ return(realloc (s, size));
+}
+
+void
+init_yy_io ()
+{
+}
+
+process(bp)
+BUFFERED_STREAM *bp;
+{
+ int c;
+
+ while ((c = bufstream_getc(bp)) != EOF)
+ putchar(c);
+}
+
+BASH_INPUT bash_input;
+
+struct stat dsb; /* can be used from gdb */
+
+/* imitate /bin/cat */
+main(argc, argv)
+int argc;
+char **argv;
+{
+ register int i;
+ BUFFERED_STREAM *bp;
+
+ if (argc == 1) {
+ bp = fd_to_buffered_stream (0);
+ process(bp);
+ exit(0);
+ }
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] == '-' && argv[i][1] == '\0') {
+ bp = fd_to_buffered_stream (0);
+ if (!bp)
+ continue;
+ process(bp);
+ free_buffered_stream (bp);
+ } else {
+ bp = open_buffered_stream (argv[i]);
+ if (!bp)
+ continue;
+ process(bp);
+ close_buffered_stream (bp);
+ }
+ }
+ exit(0);
+}
+#endif /* TEST */
+#endif /* BUFFERED_INPUT */
diff --git a/extras/ftdetect/earlpy.vim b/extras/ftdetect/earlpy.vim
new file mode 100755
index 0000000..f63a7b4
--- /dev/null
+++ b/extras/ftdetect/earlpy.vim
@@ -0,0 +1 @@
+au BufRead,BufNewFile *.earlpy set filetype=earlpy
diff --git a/extras/syntax/earlpy.vim b/extras/syntax/earlpy.vim
new file mode 100644
index 0000000..429db40
--- /dev/null
+++ b/extras/syntax/earlpy.vim
@@ -0,0 +1,20 @@
+" https://vim.fandom.com/wiki/Creating_your_own_syntax_files
+" Vim syntax file
+" Language: Celestia Star Catalogs
+" Maintainer: Kevin Lauder
+" Latest Revision: 26 April 2008
+
+if exists("b:current_syntax")
+ finish
+endif
+
+let b:current_syntax = "earlpy"
+
+" Matches
+syn match basicKeywords "\zs.*\ze$"
+syn match literalTag "\zs .*\ze$"
+syn match comment "\zs#.*\ze$"
+
+hi def link literalTag Type
+hi def link basicKeywords Keyword
+hi def link comment Comment
diff --git a/grammars/c/disambiguate.c b/grammars/c/disambiguate.c
index 403d65f..b6a99e8 100644
--- a/grammars/c/disambiguate.c
+++ b/grammars/c/disambiguate.c
@@ -1,47 +1,41 @@
-struct token *TYPE_NAMES[1024];
-size_t N_TYPE_NAMES;
-
-void alert_parse(struct state *state) {
- if (PRODUCTION_ID_TO_SYMBOL[state->production_id] == SYMBOL_TYPEDEF) {
- for (struct token *t = find_token(state, 2); t->symbol != DONE_SYMBOL; t++) {
- if (t->symbol == SYMBOL_IDENT) {
- TYPE_NAMES[N_TYPE_NAMES++] = t;
- break;
- }
- }
- }
-}
-
-int is_typename(struct token *token) {
- if (!strcmp("int", token->string)) return 1;
- for (size_t i = 0; i < N_TYPE_NAMES; i++)
- if (!strcmp(TYPE_NAMES[i]->string, token->string))
- return 1;
- return 0;
-}
+void alert_parse(struct state *state) { }
int disambiguator(struct state *old, struct state *new) {
- // printf("Old tree:\n");
- // print_parse_tree(old, 4);
- // printf("New tree:\n");
- // print_parse_tree(new, 4);
+ // fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
+ // print_parse_tree(old, 0, stderr);
+ // print_parse_tree(new, 0, stderr);
+ // fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
- if (old->production_id == PRODUCTION_DECL_STMT)
- if (!is_typename(find_token(old->reasons[0], 0)))
- return 1;
- if (new->production_id == PRODUCTION_DECL_STMT)
- if (!is_typename(find_token(new->reasons[0], 0)))
- return 0;
+ if (old->n_poisoned < new->n_poisoned) return 0;
+ if (new->n_poisoned < old->n_poisoned) return 1;
// Prefer the earlier parsings in the grammar when two entirely different
// productions are taken.
if (old->production_id != new->production_id)
- return old->production_id < new->production_id
- ? 0 : 1;
+ return (old->production_id < new->production_id) ? 0 : 1;
// If they're the same production ...
prod_id_t prod = old->production_id;
- if (PRODUCTION_ID_TO_SYMBOL[prod] == SYMBOL_EXPR) {
+
+ if (PRODUCTION_ID_TO_SYMBOL[prod] == START_SYMBOL
+ && PRODUCTION_ID_TO_PRODUCTION[prod][1] != DONE_SYMBOL) {
+ struct token *old_tok = find_token(old, 1),
+ *new_tok = find_token(new, 1);
+ if (old_tok < new_tok) return 0;
+ else if (old_tok > new_tok) return 1;
+ }
+
+ if (PRODUCTION_ID_TO_PRODUCTION[prod][0] == SYMBOL_ERROR && PRODUCTION_ID_TO_PRODUCTION[prod][1] != DONE_SYMBOL) {
+ struct token *old_tok = find_token(old, 1),
+ *new_tok = find_token(new, 1);
+ if (old_tok < new_tok) return 0;
+ else if (old_tok > new_tok) return 1;
+ } else if (PRODUCTION_ID_TO_PRODUCTION[prod][1] == SYMBOL_ERROR) {
+ struct token *old_tok = find_token(old, 1),
+ *new_tok = find_token(new, 1);
+ if (old_tok < new_tok) return 1;
+ else if (old_tok > new_tok) return 0;
+ } else if (PRODUCTION_ID_TO_SYMBOL[prod] == SYMBOL_EXPR) {
if (PRODUCTION_ID_TO_PRODUCTION[prod][1] == SYMBOL_OP) {
struct token *old_tok = find_token(old, 1),
*new_tok = find_token(new, 1);
@@ -49,7 +43,7 @@ int disambiguator(struct state *old, struct state *new) {
const char *precedence[] = {".", "->", "*", "/", "%", "+", "-",
"<<", ">>", "<", "<=", ">", ">=", "==", "!=", "&", "|", "&&",
"||", "=", "+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=",
- "^=", "|=", ",", 0};
+ "^=", "|=", ",", ":", 0};
if (strcmp(old_s, new_s)) {
for (const char **p = precedence; *p; p++) {
if (!strcmp(old_s, *p)) {
@@ -58,16 +52,30 @@ int disambiguator(struct state *old, struct state *new) {
return 0;
}
}
- // BAD!
- return 2;
+ fprintf(stderr, "ERROR: didn't find operator '%s'\n", old_s);
+ exit(1);
} else {
- // Associate RIGHT
if (old_tok < new_tok) return 1;
else if (old_tok > new_tok) return 0;
}
}
}
+ // Generally speaking, we want left associativity to avoid long chains of
+ // completions.
+ struct token *old_tok = find_token(old, 1),
+ *new_tok = find_token(new, 1);
+ if (old_tok < new_tok) return 1;
+ else if (old_tok > new_tok) return 0;
+
fprintf(stderr, "TOTALLY UNKNOWN!\n");
+ fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n");
+ pprint_state(old);
+ // print_parse_tree(old, 0, stderr);
+ fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n");
+ pprint_state(new);
+ // print_parse_tree(new, 0, stderr);
+ fprintf(stderr, "~~~~~~~~~~~~~~~~~~~~~\n");
+ exit(1);
return 2;
}
diff --git a/grammars/c/grammar.earlpy b/grammars/c/grammar.earlpy
new file mode 100644
index 0000000..99cafe9
--- /dev/null
+++ b/grammars/c/grammar.earlpy
@@ -0,0 +1,265 @@
+#### OPTIMIZATIONS:
+# where possible, we want parse trees that look like:
+# (((A + B) + C) + D), i.e., left-associativity because it avoids long chains
+# of completions. Another explanation for it is that then ambiguity is resolved
+# as early in the left-to-right parse as possible.
+
+KEYWORDS list
+ switch volatile case while do else const for if
+ struct union typedef void return break continue
+ sizeof
+
+IDENT regex
+ [a-zA-Z_][0-9a-zA-Z_]*
+
+INT regex
+ ((0x[0-9a-fA-F]*)|([0-9]*))([uUlL])*
+
+# https://stackoverflow.com/questions/2039795/regular-expression-for-a-string-literal-in-flex-lex
+STRING regex
+ ["]([^\\"]|\\.)*["]
+
+CHAR regex
+ [']([\\][']|[^'][^'])*[^']?[']
+
+OP list
+ ; ,
+ - + ! % * & / << >> ^ |
+ -= += != %= *= &= /= <<= == >>= ^= |=
+ && || ++ --
+ < <= > >= =
+ . ->
+
+TERNARY list
+ : ?
+
+PARENS list
+ ( ) { } [ ]
+
+############### ERROR RECOVERY
+# These rules match either a single token, or a pair of balanced parentheses
+
+NONPAREN nonterm
+ KEYWORDS
+ IDENT
+ INT
+ STRING
+ CHAR
+ TERNARY
+ OP
+
+ERROR_INNER nonterm .poison
+ ERROR
+ ERROR_INNER ERROR
+
+ERROR nonterm .poison
+ ( ERROR_INNER )
+ { ERROR_INNER }
+ [ ERROR_INNER ]
+ ( )
+ { }
+ [ ]
+ NONPAREN
+
+############### TYPE PARSING
+# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing,
+# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression)
+PRIMITIVE_TYPE nonterm
+ struct IDENT
+ union IDENT
+ struct IDENT AGGREGATE_DECLARATION
+ union IDENT AGGREGATE_DECLARATION
+ const PRIMITIVE_TYPE
+ volatile PRIMITIVE_TYPE
+ void
+ IDENT
+
+# A TYPE_EXPRESSION is basically an lvalue expression.
+TYPE_EXPRESSION nonterm
+ IDENT
+ TYPE_EXPRESSION [ ]
+ TYPE_EXPRESSION [ EXPR ]
+ * TYPE_EXPRESSION
+ const TYPE_EXPRESSION
+ ( TYPE_EXPRESSION )
+ TYPE_EXPRESSION ( )
+ TYPE_EXPRESSION ( ARGS )
+
+DECLARATION nonterm
+ PRIMITIVE_TYPE TYPE_EXPRESSION
+
+# An ANONYMOUS_TYPE has no name
+ANONYMOUS_TYPE nonterm
+ PRIMITIVE_TYPE
+ ANONYMOUS_TYPE [ ]
+ ANONYMOUS_TYPE [ EXPR ]
+ ANONYMOUS_TYPE *
+ ANONYMOUS_TYPE const *
+ const ANONYMOUS_TYPE
+ ( ANONYMOUS_TYPE )
+ ANONYMOUS_TYPE ( )
+ ANONYMOUS_TYPE ( ARGS )
+
+############### TOP LEVEL
+TOP_LEVEL nonterm .start
+ TOP_LEVEL TYPEDEF
+ TOP_LEVEL STRUCTDECL
+ TOP_LEVEL FUNCTION
+ TOP_LEVEL DECLARATION_STATEMENT
+ TYPEDEF
+ STRUCTDECL
+ FUNCTION
+ DECLARATION_STATEMENT
+ TOP_LEVEL ERROR
+ ERROR
+
+ARGS nonterm
+ ANONYMOUS_TYPE
+ ARGS , ANONYMOUS_TYPE
+ DECLARATION
+ ARGS , DECLARATION
+
+CALL_ARGS nonterm
+ CALL_ARGS , EXPR
+ EXPR
+
+OLD_ARGS nonterm
+ OLD_ARGS , IDENT
+ IDENT
+
+OLD_ARG_DECLS nonterm
+ OLD_ARG_DECLS DECLARATION_STATEMENT
+ DECLARATION_STATEMENT
+
+FUNCTION nonterm
+ DECLARATION ( ) TRUE_BLOCK
+ DECLARATION ( ARGS ) TRUE_BLOCK
+ DECLARATION ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK
+ IDENT ( OLD_ARGS ) OLD_ARG_DECLS TRUE_BLOCK
+
+AGGREGATE_DECLARATION nonterm
+ { STMTS }
+ { }
+
+TYPEDEF nonterm
+ typedef PRIMITIVE_TYPE TYPE_EXPRESSION ;
+
+STRUCTDECL nonterm
+ struct IDENT AGGREGATE_DECLARATION ;
+
+UNIONDECL nonterm
+ union IDENT AGGREGATE_DECLARATION ;
+
+EXPR nonterm
+ INT
+ STRING
+ CHAR
+ IDENT
+ EXPR --
+ EXPR ++
+ -- EXPR
+ ++ EXPR
+ - EXPR
+ + EXPR
+ & EXPR
+ * EXPR
+ ( ANONYMOUS_TYPE ) EXPR
+ EXPR ( )
+ EXPR ( CALL_ARGS )
+ EXPR OP EXPR
+ EXPR ? EXPR : EXPR
+ EXPR ? : EXPR
+ EXPR [ EXPR ]
+ ! EXPR
+ ( EXPR )
+ sizeof EXPR
+ sizeof ANONYMOUS_TYPE
+ INITIALIZER_LIST
+ EXPR EXPR
+
+INITIALIZER_LIST nonterm
+ { INNER_INITIALIZER_LIST }
+ { }
+
+INNER_INITIALIZER_LIST nonterm
+ EXPR
+ INNER_INITIALIZER_LIST , EXPR
+ INNER_INITIALIZER_LIST ,
+
+IF nonterm
+ if ( EXPR ) BLOCK
+ if ( EXPR ) BLOCK else BLOCK
+
+WHILE nonterm
+ while ( EXPR ) BLOCK
+
+DO nonterm
+ do BLOCK while ( EXPR )
+
+FOR nonterm
+ for ( ; ; ) BLOCK
+ for ( ; ; EXPR ) BLOCK
+ for ( ; EXPR ; ) BLOCK
+ for ( ; EXPR ; EXPR ) BLOCK
+ for ( EXPR ; ; ) BLOCK
+ for ( EXPR ; ; EXPR ) BLOCK
+ for ( EXPR ; EXPR ; ) BLOCK
+ for ( EXPR ; EXPR ; EXPR ) BLOCK
+
+SWITCH nonterm
+ switch ( EXPR ) BLOCK
+
+DECLARATION_CHAIN nonterm
+ DECLARATION_CHAIN , TYPE_EXPRESSION
+ TYPE_EXPRESSION
+ DECLARATION_CHAIN , TYPE_EXPRESSION = EXPR
+ TYPE_EXPRESSION = EXPR
+
+DECLARATION_STATEMENT nonterm
+ PRIMITIVE_TYPE DECLARATION_CHAIN ;
+
+RETURN nonterm
+ return EXPR ;
+ return ;
+
+BREAK nonterm
+ break ;
+
+CONTINUE nonterm
+ continue ;
+
+LABEL nonterm
+ IDENT : STMT
+
+CASE nonterm
+ case EXPR : STMT
+
+STMT nonterm
+ TRUE_BLOCK
+ LABEL
+ CASE
+ BREAK
+ CONTINUE
+ RETURN
+ IF
+ WHILE
+ DO
+ FOR
+ SWITCH
+ DECLARATION_STATEMENT
+ EXPR ;
+ ;
+
+STMTS nonterm
+ STMTS STMT
+ STMT
+ STMTS ERROR
+ ERROR
+
+TRUE_BLOCK nonterm
+ { }
+ { STMTS }
+
+BLOCK nonterm
+ TRUE_BLOCK
+ STMT
diff --git a/grammars/c/grammar.txt b/grammars/c/grammar.txt
deleted file mode 100644
index ffe85c3..0000000
--- a/grammars/c/grammar.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-KEYWORDS list
- switch volatile case while do else const for if
- struct union typedef void
-
-IDENT regex
- [a-zA-Z_][0-9a-zA-Z_]*
-
-INT regex
- [0-9]+
-
-OP list
- ( ) { } [ ]
- ; ,
- - + ! % * & / << >> ^ |
- -= += != %= *= &= /= <<= == >>= ^= |=
- && || ++ --
- < <= > >= =
- . -> ? :
-
-############### TYPE PARSING
-# A PRIMITIVE_TYPE is the core object that takes up space after dereferencing,
-# calling, etc. A normal variable declaration is PRIMITIVE_TYPE (expression)
-PRIMITIVE_TYPE nonterm
- struct IDENT
- union IDENT
- struct IDENT AGGREGATE_DECLARATION
- union IDENT AGGREGATE_DECLARATION
- const PRIMITIVE_TYPE
- volatile PRIMITIVE_TYPE
- void
- IDENT
-
-# A TYPE_EXPRESSION is basically an lvalue expression.
-TYPE_EXPRESSION nonterm
- IDENT
- TYPE_EXPRESSION [ ]
- TYPE_EXPRESSION [ INT ]
- * TYPE_EXPRESSION
- ( TYPE_EXPRESSION )
- TYPE_EXPRESSION ( )
- TYPE_EXPRESSION ( ARGS )
-
-DECLARATION nonterm
- PRIMITIVE_TYPE TYPE_EXPRESSION
-
-# An ANONYMOUS_TYPE has no name
-ANONYMOUS_TYPE nonterm
- PRIMITIVE_TYPE
- ANONYMOUS_TYPE [ ]
- ANONYMOUS_TYPE [ INT ]
- * ANONYMOUS_TYPE
- ( ANONYMOUS_TYPE )
- ANONYMOUS_TYPE ( )
- ANONYMOUS_TYPE ( ARGS )
-
-############### TOP LEVEL
-TOP_LEVEL nonterm .start
- TYPEDEF
- FUNCTION
-
-ARGS nonterm
- ANONYMOUS_TYPE
- ANONYMOUS_TYPE , ARGS
- DECLARATION
- DECLARATION , ARGS
-
-FUNCTION nonterm
- DECLARATION ( ) TRUE_BLOCK
- DECLARATION ( ARGS ) TRUE_BLOCK
-
-AGGREGATE_DECLARATION nonterm
- { STMTS }
-
-TYPEDEF nonterm
- typedef PRIMITIVE_TYPE TYPE_EXPRESSION ;
-
-EXPR nonterm
- INT
- IDENT
- EXPR --
- EXPR ++
- EXPR OP EXPR
- EXPR ? EXPR : EXPR
-
-IF nonterm
- if ( EXPR ) BLOCK
- if ( EXPR ) BLOCK else BLOCK
-
-WHILE nonterm
- while ( EXPR ) BLOCK
-
-DO nonterm
- do BLOCK while ( EXPR )
-
-FOR nonterm
- for ( EXPR ; EXPR ; EXPR ) BLOCK
-
-SWITCH nonterm
- switch ( EXPR ) BLOCK
-
-DECLARATION_CHAIN nonterm
- TYPE_EXPRESSION
- TYPE_EXPRESSION , DECLARATION_CHAIN
- TYPE_EXPRESSION = EXPR
- TYPE_EXPRESSION = EXPR , DECLARATION_CHAIN
-
-DECLARATION_STATEMENT nonterm
- PRIMITIVE_TYPE DECLARATION_CHAIN ;
-
-STMT nonterm
- IF
- WHILE
- DO
- FOR
- SWITCH
- # NOTE: it auto-prefers declarations right now
- DECLARATION_STATEMENT .name DECL_STMT
- EXPR ;
-
-STMTS nonterm
- STMT
- STMT STMTS
-
-TRUE_BLOCK nonterm
- { }
- { STMTS }
-
-BLOCK nonterm
- TRUE_BLOCK
- STMT
diff --git a/grammars/c/preprocess.c b/grammars/c/preprocess.c
new file mode 100644
index 0000000..3ae7406
--- /dev/null
+++ b/grammars/c/preprocess.c
@@ -0,0 +1,45 @@
+void preprocess(char *string, size_t length) {
+ int on_newline = 1;
+ for (int i = 0; i < length;) {
+ switch (string[i]) {
+ case '/': {
+ on_newline = 0;
+ if (string[i+1] == '*') {
+ for (; i+1 < length; i++) {
+ if (string[i] == '*' && string[i+1] == '/') {
+ string[i] = ' ';
+ string[i+1] = ' ';
+ break;
+ }
+ string[i] = ' ';
+ }
+ continue;
+ } else if (string[i+1] == '/') {
+ for (; i < length; i++) {
+ if (string[i] == '\n') {
+ string[i] = ' ';
+ break;
+ }
+ string[i] = ' ';
+ }
+ continue;
+ }
+ break;
+ }
+ case '#': {
+ if (on_newline) {
+ int escaped = 0;
+ for (i++; i < length; i++) {
+ if (string[i] == '\n' && !escaped) break;
+ escaped = (string[i] == '\\');
+ string[i] = ' ';
+ }
+ break;
+ }
+ }
+ case '\n': on_newline = 1; break;
+ default: on_newline = 0; break;
+ }
+ i++;
+ }
+}
diff --git a/parse.py b/parse.py
new file mode 100644
index 0000000..190c312
--- /dev/null
+++ b/parse.py
@@ -0,0 +1,6 @@
+import earlpy
+import sys
+
+p = earlpy.Parser(sys.argv[1])
+if len(sys.argv) == 3:
+ print(p.parse_file(sys.argv[2]).pprint())
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback