Merging from branches/antlr3 (r246:354)

author: Christopher L. Conway <christopherleeconway@gmail.com> 2010-03-30 20:22:33 +0000
committer: Christopher L. Conway <christopherleeconway@gmail.com> 2010-03-30 20:22:33 +0000
commit: 8730e9320a833a9eb0e65074f9988950b7424c0c (patch)
tree: 1cb09404256743e208fece079ba473595e05edcd /src/parser/smt
parent: 8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff)
7 files changed, 694 insertions, 591 deletions
diff --git a/src/parser/smt/.gitignore b/src/parser/smt/.gitignore
new file mode 100644
index 000000000..7fd0cf319
--- /dev/null
+++ b/src/parser/smt/.gitignore
@@ -0,0 +1,4 @@
+/.deps
+/stamp-generated
+/generated
+/Makefile.in
diff --git a/src/parser/smt/Makefile.am b/src/parser/smt/Makefile.am
index 7fe235002..3ea6dc940 100644
--- a/src/parser/smt/Makefile.am
+++ b/src/parser/smt/Makefile.am
@@ -1,29 +1,29 @@
 AM_CPPFLAGS = \
 	-D__BUILDING_CVC4PARSERLIB \
-	-I@srcdir@/../../include -I@srcdir@/../..
+	-I@srcdir@/../../include -I@srcdir@/../.. $(ANTLR_INCLUDES)
 AM_CXXFLAGS = -Wall -fvisibility=hidden
+# Compile generated C files using C++ compiler
+CC=$(CXX)
 
 noinst_LTLIBRARIES = libparsersmt.la
 
 ANTLR_TOKEN_STUFF = \
-	@srcdir@/generated/SmtVocabularyTokenTypes.hpp \
-	@srcdir@/generated/SmtVocabularyTokenTypes.txt \
-	@srcdir@/generated/AntlrSmtParserTokenTypes.hpp \
-	@srcdir@/generated/AntlrSmtParserTokenTypes.txt
+	@srcdir@/generated/Smt.tokens
 ANTLR_LEXER_STUFF = \
-	@srcdir@/generated/AntlrSmtLexer.hpp \
-	@srcdir@/generated/AntlrSmtLexer.cpp \
+	@srcdir@/generated/SmtLexer.h \
+	@srcdir@/generated/SmtLexer.c \
 	$(ANTLR_TOKEN_STUFF)
 ANTLR_PARSER_STUFF = \
-	@srcdir@/generated/AntlrSmtParser.hpp \
-	@srcdir@/generated/AntlrSmtParser.cpp
+	@srcdir@/generated/SmtParser.h \
+	@srcdir@/generated/SmtParser.c
 ANTLR_STUFF = \
 	$(ANTLR_LEXER_STUFF) \
 	$(ANTLR_PARSER_STUFF)
 
 libparsersmt_la_SOURCES = \
-	smt_lexer.g \
-	smt_parser.g \
+	Smt.g \
+	smt_input.h \
+	smt_input.cpp \
 	$(ANTLR_STUFF)
 
 BUILT_SOURCES = $(ANTLR_STUFF)
@@ -36,16 +36,14 @@ maintainer-clean-local:
 @srcdir@/stamp-generated:
 	mkdir -p @srcdir@/generated
 	touch @srcdir@/stamp-generated
+
 # antlr doesn't overwrite output files, it just leaves them.  So we have to delete them first.
-@srcdir@/generated/AntlrSmtLexer.hpp: smt_lexer.g @srcdir@/stamp-generated
-	$(AM_V_at)-rm -f $(ANTLR_LEXER_STUFF)
-	$(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_lexer.g"
-@srcdir@/generated/AntlrSmtLexer.cpp $(ANTLR_TOKEN_STUFF): @srcdir@/generated/AntlrSmtLexer.hpp
-# doesn't actually depend on the lexer, but if we're doing parallel
+@srcdir@/generated/SmtLexer.h: Smt.g @srcdir@/stamp-generated
+	-rm -f $(ANTLR_STUFF)
+	$(ANTLR) -o "@srcdir@/generated" "@srcdir@/Smt.g"
+
+# These don't actually depend on SmtLexer.h, but if we're doing parallel
 # make and the lexer needs to be rebuilt, we have to keep the rules
 # from running in parallel (since the token files will be deleted &
 # recreated)
-@srcdir@/generated/AntlrSmtParser.hpp: smt_parser.g smt_lexer.g $(ANTLR_TOKEN_STUFF) @srcdir@/stamp-generated
-	$(AM_V_at)-rm -f $(ANTLR_PARSER_STUFF)
-	$(AM_V_GEN)$(ANTLR) -o "@srcdir@/generated" "@srcdir@/smt_parser.g"
-@srcdir@/generated/AntlrSmtParser.cpp: @srcdir@/generated/AntlrSmtParser.hpp
+@srcdir@/generated/SmtLexer.c @srcdir@/generated/SmtParser.h @srcdir@/generated/SmtParser.c $(ANTLR_TOKEN_STUFF): @srcdir@/generated/SmtLexer.h
diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g
new file mode 100644
index 000000000..7095c7269
--- /dev/null
+++ b/src/parser/smt/Smt.g
@@ -0,0 +1,552 @@
+/* *******************                                                        */
+/*  Smt.g
+ ** Original author: cconway
+ ** Major contributors: dejan, mdeters
+ ** Minor contributors (to current version): none
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
+ ** Courant Institute of Mathematical Sciences
+ ** New York University
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.
+ **
+ ** Parser for SMT-LIB input language.
+ **/
+
+grammar Smt;
+
+options {
+  language = 'C';                  // C output for antlr
+//  defaultErrorHandler = false;      // Skip the default error handling, just break with exceptions
+  k = 2;
+}
+
+@header {
+/**
+ ** This file is part of the CVC4 prototype.
+ ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
+ ** Courant Institute of Mathematical Sciences
+ ** New York University
+ ** See the file COPYING in the top-level source directory for licensing
+ ** information.
+ **/
+}
+
+@lexer::includes {
+/* This improves performance by ~10 percent on big inputs.
+ * This option is only valid if we know the input is ASCII (or some 8-bit encoding).
+ * If we know the input is UTF-16, we can use ANTLR3_INLINE_INPUT_UTF16.
+ * Otherwise, we have to let the lexer detect the encoding at runtime.
+ */
+#define ANTLR3_INLINE_INPUT_ASCII
+}
+
+@parser::includes {
+#include "expr/command.h"
+#include "parser/input.h"
+
+namespace CVC4 {
+  class Expr;
+namespace parser {
+  class SmtInput;
+}
+}
+
+extern
+void SetSmtInput(CVC4::parser::SmtInput* smt);
+
+}
+
+@parser::postinclude {
+#include "expr/expr.h"
+#include "expr/kind.h"
+#include "expr/type.h"
+#include "parser/input.h"
+#include "parser/smt/smt_input.h"
+#include "util/output.h"
+#include <vector>
+
+using namespace CVC4;
+using namespace CVC4::parser;
+}
+
+@members {
+static CVC4::parser::SmtInput *input;
+
+extern
+void SetSmtInput(CVC4::parser::SmtInput* _input) {
+  input = _input;
+}
+}
+
+/**
+ * Parses an expression.
+ * @return the parsed expression
+ */
+parseExpr returns [CVC4::Expr expr]
+  : annotatedFormula[expr]
+  | EOF
+  ;
+
+/**
+ * Parses a command (the whole benchmark)
+ * @return the command of the benchmark
+ */
+parseCommand returns [CVC4::Command* cmd]
+  : b = benchmark { $cmd = b; }
+  ;
+
+/**
+ * Matches the whole SMT-LIB benchmark.
+ * @return the sequence command containing the whole problem
+ */
+benchmark returns [CVC4::Command* cmd]
+  : LPAREN_TOK BENCHMARK_TOK IDENTIFIER c = benchAttributes RPAREN_TOK 
+  	{ $cmd = c; }
+  | EOF { $cmd = 0; }
+  ;
+
+/**
+ * Matches a sequence of benchmark attributes and returns a pointer to a
+ * command sequence.
+ * @return the command sequence
+ */
+benchAttributes returns [CVC4::CommandSequence* cmd_seq]
+@init {
+  cmd_seq = new CommandSequence();
+}
+  : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+
+  ;
+
+/**
+ * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns
+ * a corresponding command
+ * @return a command corresponding to the attribute
+ */
+benchAttribute returns [CVC4::Command* smt_command]
+@declarations { 
+  std::string name;
+  BenchmarkStatus b_status;
+  Expr expr;
+}
+  : LOGIC_TOK identifier[name,CHECK_NONE,SYM_VARIABLE]
+    { input->setLogic(name);
+      smt_command = new SetBenchmarkLogicCommand(name);   }
+  | ASSUMPTION_TOK annotatedFormula[expr]
+    { smt_command = new AssertCommand(expr);   }
+  | FORMULA_TOK annotatedFormula[expr]
+    { smt_command = new CheckSatCommand(expr); }
+  | STATUS_TOK status[b_status]                   
+    { smt_command = new SetBenchmarkStatusCommand(b_status); }        
+  | EXTRAFUNS_TOK LPAREN_TOK (functionDeclaration)+ RPAREN_TOK  
+  | EXTRAPREDS_TOK LPAREN_TOK (predicateDeclaration)+ RPAREN_TOK  
+  | EXTRASORTS_TOK LPAREN_TOK sortDeclaration+ RPAREN_TOK  
+  | NOTES_TOK STRING_LITERAL        
+  | annotation
+  ;
+
+/**
+ * Matches an annotated formula.
+ * @return the expression representing the formula
+ */
+annotatedFormula[CVC4::Expr& expr]
+@init {
+  Debug("parser") << "annotated formula: " << AntlrInput::tokenText(LT(1)) << std::endl;
+  Kind kind;
+  std::string name;
+  std::vector<Expr> args; /* = getExprVector(); */
+} 
+  : /* a built-in operator application */
+    LPAREN_TOK builtinOp[kind] annotatedFormulas[args,expr] RPAREN_TOK 
+    { input->checkArity(kind, args.size());
+      if((kind == CVC4::kind::AND || kind == CVC4::kind::OR) && args.size() == 1) {
+        /* Unary AND/OR can be replaced with the argument.
+	       It just so happens expr should already by the only argument. */
+        Assert( expr == args[0] );
+      } else {
+        expr = input->mkExpr(kind, args);
+      }
+    }
+
+  | /* A non-built-in function application */
+
+    // Semantic predicate not necessary if parenthesized subexpressions
+    // are disallowed
+    // { isFunction(LT(2)->getText()) }? 
+
+    LPAREN_TOK 
+    functionSymbol[expr]
+    { args.push_back(expr); }
+    annotatedFormulas[args,expr] RPAREN_TOK
+    // TODO: check arity
+    { expr = input->mkExpr(CVC4::kind::APPLY_UF,args); }
+
+  | /* An ite expression */
+    LPAREN_TOK (ITE_TOK | IF_THEN_ELSE_TOK) 
+    annotatedFormula[expr]
+    { args.push_back(expr); } 
+    annotatedFormula[expr]
+    { args.push_back(expr); } 
+    annotatedFormula[expr]
+    { args.push_back(expr); } 
+    RPAREN_TOK
+    { expr = input->mkExpr(CVC4::kind::ITE, args); }
+
+  | /* a let/flet binding */
+    LPAREN_TOK 
+    (LET_TOK LPAREN_TOK var_identifier[name,CHECK_UNDECLARED]
+      | FLET_TOK LPAREN_TOK fun_identifier[name,CHECK_UNDECLARED] )
+    annotatedFormula[expr] RPAREN_TOK
+    { input->defineVar(name,expr); }
+    annotatedFormula[expr]
+    RPAREN_TOK
+    { input->undefineVar(name); }
+
+  | /* a variable */
+    ( identifier[name,CHECK_DECLARED,SYM_VARIABLE]
+      | var_identifier[name,CHECK_DECLARED] 
+      | fun_identifier[name,CHECK_DECLARED] )
+    { expr = input->getVariable(name); }
+
+    /* constants */
+  | TRUE_TOK          { expr = input->getTrueExpr(); }
+  | FALSE_TOK         { expr = input->getFalseExpr(); }
+    /* TODO: let, flet, quantifiers, arithmetic constants */
+  ;
+
+/**
+ * Matches a sequence of annotated formulas and puts them into the formulas
+ * vector.
+ * @param formulas the vector to fill with formulas
+ * @param expr an Expr reference for the elements of the sequence
+ */   
+/* NOTE: We pass an Expr in here just to avoid allocating a fresh Expr every 
+ * time through this rule. */
+annotatedFormulas[std::vector<CVC4::Expr>& formulas, CVC4::Expr& expr]
+  : ( annotatedFormula[expr] { formulas.push_back(expr); } )+
+  ;
+
+/**
+* Matches on of the unary Boolean connectives.
+* @return the kind of the Bollean connective
+*/
+builtinOp[CVC4::Kind& kind]
+@init {
+  Debug("parser") << "builtin: " << AntlrInput::tokenText(LT(1)) << std::endl;
+}
+  : NOT_TOK      { $kind = CVC4::kind::NOT;     }
+  | IMPLIES_TOK  { $kind = CVC4::kind::IMPLIES; }
+  | AND_TOK      { $kind = CVC4::kind::AND;     }
+  | OR_TOK       { $kind = CVC4::kind::OR;      }
+  | XOR_TOK      { $kind = CVC4::kind::XOR;     }
+  | IFF_TOK      { $kind = CVC4::kind::IFF;     }
+  | EQUAL_TOK    { $kind = CVC4::kind::EQUAL;   }
+  | DISTINCT_TOK { $kind = CVC4::kind::DISTINCT; }
+    /* TODO: lt, gt, plus, minus, etc. */
+  ;
+
+/**
+ * Matches a (possibly undeclared) predicate identifier (returning the string). 
+ * @param check what kind of check to do with the symbol
+ */
+predicateName[std::string& name, CVC4::parser::DeclarationCheck check]
+  :  functionName[name,check]
+  ;
+
+/**
+ * Matches a (possibly undeclared) function identifier (returning the string)
+ * @param check what kind of check to do with the symbol
+ */
+functionName[std::string& name, CVC4::parser::DeclarationCheck check]
+  :  identifier[name,check,SYM_FUNCTION] 
+  ;
+
+/**
+ * Matches an previously declared function symbol (returning an Expr)
+ */
+functionSymbol[CVC4::Expr& fun]
+@declarations {
+	std::string name;
+}
+  : functionName[name,CHECK_DECLARED]
+    { input->checkFunction(name);
+      fun = input->getFunction(name); }
+  ;
+  
+/**
+ * Matches an attribute name from the input (:attribute_name).
+ */
+attribute
+  :  ATTR_IDENTIFIER
+  ;
+
+
+
+functionDeclaration
+@declarations {
+  std::string name;
+  std::vector<Type*> sorts;
+}
+  : LPAREN_TOK functionName[name,CHECK_UNDECLARED] 
+      t = sortSymbol // require at least one sort
+    { sorts.push_back(t); }
+      sortList[sorts] RPAREN_TOK
+    { t = input->functionType(sorts);
+      input->mkVar(name, t); } 
+  ;
+              
+/**
+ * Matches the declaration of a predicate and declares it
+ */
+predicateDeclaration
+@declarations {
+  std::string name;
+  std::vector<Type*> p_sorts;
+}
+  : LPAREN_TOK predicateName[name,CHECK_UNDECLARED] sortList[p_sorts] RPAREN_TOK
+    { Type *t = input->predicateType(p_sorts);
+      input->mkVar(name, t); } 
+  ;
+
+sortDeclaration 
+@declarations {
+  std::string name;
+}
+  : sortName[name,CHECK_UNDECLARED]
+    { Debug("parser") << "sort decl: '" << name << "'" << std::endl;
+      input->newSort(name); }
+  ;
+  
+/**
+ * Matches a sequence of sort symbols and fills them into the given vector.
+ */
+sortList[std::vector<CVC4::Type*>& sorts]
+  : ( t = sortSymbol { sorts.push_back(t); })* 
+  ;
+
+/**
+ * Matches the sort symbol, which can be an arbitrary identifier.
+ * @param check the check to perform on the name
+ */
+sortName[std::string& name, CVC4::parser::DeclarationCheck check] 
+  : identifier[name,check,SYM_SORT] 
+  ;
+
+sortSymbol returns [CVC4::Type* t]
+@declarations {
+  std::string name;
+}
+  : sortName[name,CHECK_NONE] 
+  	{ $t = input->getSort(name); }
+  ;
+
+/**
+ * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'.
+ */
+status[ CVC4::BenchmarkStatus& status ]
+  : SAT_TOK       { $status = SMT_SATISFIABLE;    }
+  | UNSAT_TOK     { $status = SMT_UNSATISFIABLE;  }
+  | UNKNOWN_TOK   { $status = SMT_UNKNOWN;        }
+  ;
+
+/**
+ * Matches an annotation, which is an attribute name, with an optional user
+ */
+annotation
+  : attribute (USER_VALUE)?
+  ;
+
+/**
+ * Matches an identifier and sets the string reference parameter id.
+ * @param id string to hold the identifier
+ * @param check what kinds of check to do on the symbol
+ * @param type the intended namespace for the identifier
+ */
+identifier[std::string& id,
+		   CVC4::parser::DeclarationCheck check, 
+           CVC4::parser::SymbolType type] 
+  : IDENTIFIER
+    { id = AntlrInput::tokenText($IDENTIFIER);
+      Debug("parser") << "identifier: " << id
+                      << " check? " << toString(check)
+                      << " type? " << toString(type) << std::endl;
+      input->checkDeclaration(id, check, type); }
+  ;
+
+/**
+ * Matches an variable identifier and sets the string reference parameter id.
+ * @param id string to hold the identifier
+ * @param check what kinds of check to do on the symbol
+ */
+var_identifier[std::string& id,
+    		   CVC4::parser::DeclarationCheck check] 
+  : VAR_IDENTIFIER
+    { id = AntlrInput::tokenText($VAR_IDENTIFIER);
+      Debug("parser") << "var_identifier: " << id
+                      << " check? " << toString(check) << std::endl;
+      input->checkDeclaration(id, check, SYM_VARIABLE); }
+  ;
+
+/**
+ * Matches an function identifier and sets the string reference parameter id.
+ * @param id string to hold the identifier
+ * @param check what kinds of check to do on the symbol
+ */
+fun_identifier[std::string& id,
+    		   CVC4::parser::DeclarationCheck check] 
+  : FUN_IDENTIFIER
+    { id = AntlrInput::tokenText($FUN_IDENTIFIER);
+      Debug("parser") << "fun_identifier: " << id
+                      << " check? " << toString(check) << std::endl;
+      input->checkDeclaration(id, check, SYM_FUNCTION); }
+  ;
+
+
+// Base SMT-LIB tokens
+DISTINCT_TOK      : 'distinct';
+ITE_TOK           : 'ite';
+IF_THEN_ELSE_TOK  : 'if_then_else';
+TRUE_TOK          : 'true';
+FALSE_TOK         : 'false';
+NOT_TOK           : 'not';
+IMPLIES_TOK       : 'implies';
+AND_TOK           : 'and';
+OR_TOK            : 'or';
+XOR_TOK           : 'xor';
+IFF_TOK           : 'iff';
+EXISTS_TOK        : 'exists';
+FORALL_TOK        : 'forall';
+LET_TOK           : 'let';
+FLET_TOK          : 'flet';
+THEORY_TOK        : 'theory';
+SAT_TOK           : 'sat';
+UNSAT_TOK         : 'unsat';
+UNKNOWN_TOK       : 'unknown';
+BENCHMARK_TOK     : 'benchmark';
+
+// The SMT attribute tokens
+LOGIC_TOK       : ':logic';
+ASSUMPTION_TOK  : ':assumption';
+FORMULA_TOK     : ':formula';
+STATUS_TOK      : ':status';
+EXTRASORTS_TOK  : ':extrasorts';
+EXTRAFUNS_TOK   : ':extrafuns';
+EXTRAPREDS_TOK  : ':extrapreds';
+NOTES_TOK       : ':notes';
+
+// arithmetic symbols
+EQUAL_TOK         : '=';
+LESS_THAN_TOK     : '<';
+GREATER_THAN_TOK  : '>';
+AMPERSAND_TOK     : '&';
+AT_TOK            : '@';
+POUND_TOK         : '#';
+PLUS_TOK          : '+';
+MINUS_TOK         : '-';
+STAR_TOK          : '*';
+DIV_TOK           : '/';
+PERCENT_TOK       : '%';
+PIPE_TOK          : '|';
+TILDE_TOK         : '~';
+
+// Language meta-symbols
+//QUESTION_TOK      : '?';
+//DOLLAR_TOK        : '$';
+LPAREN_TOK        : '(';
+RPAREN_TOK        : ')';
+
+/**
+ * Matches an identifier from the input. An identifier is a sequence of letters,
+ * digits and "_", "'", "." symbols, starting with a letter.
+ */
+IDENTIFIER /*options { paraphrase = 'an identifier'; testLiterals = true; }*/
+  :  ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*
+  ;
+
+/**
+ * Matches an identifier starting with a colon.
+ */
+ATTR_IDENTIFIER /*options { paraphrase = 'an identifier starting with a colon'; testLiterals = true; }*/
+  :  ':' IDENTIFIER
+  ;
+
+/**
+ * Matches an identifier starting with a question mark.
+ */
+VAR_IDENTIFIER
+  : '?' IDENTIFIER
+  ;
+  
+/**
+ * Matches an identifier starting with a dollar sign.
+ */
+FUN_IDENTIFIER
+  : '$' IDENTIFIER
+  ;
+
+/**
+ * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with
+ * an open brace and end with closed brace.
+ */
+USER_VALUE
+  :   '{'
+      ( ~('{' | '}') )*
+    '}'
+  ;
+
+
+/**
+ * Matches and skips whitespace in the input.
+ */
+WHITESPACE /*options { paraphrase = 'whitespace'; }*/
+  :  (' ' | '\t' | '\f' | '\r' | '\n')+             { $channel = HIDDEN;; }
+  ;
+
+/**
+ * Matches a numeral from the input (non-empty sequence of digits).
+ */
+NUMERAL_TOK /*options { paraphrase = 'a numeral'; }*/
+  :  (DIGIT)+
+  ;
+
+/**
+ * Matches a double quoted string literal. Escaping is supported, and escape
+ * character '\' has to be escaped.
+ */
+STRING_LITERAL /*options { paraphrase = 'a string literal'; }*/
+  :  '"' (ESCAPE | ~('"'|'\\'))* '"'
+  ;
+
+/**
+ * Matches the comments and ignores them
+ */
+COMMENT /*options { paraphrase = 'comment'; }*/
+  : ';' (~('\n' | '\r'))*                    { $channel = HIDDEN;; }
+  ;
+
+
+/**
+ * Matches any letter ('a'-'z' and 'A'-'Z').
+ */
+fragment
+ALPHA /*options { paraphrase = 'a letter'; }*/
+  :  'a'..'z'
+  |  'A'..'Z'
+  ;
+
+/**
+ * Matches the digits (0-9)
+ */
+fragment
+DIGIT /*options { paraphrase = 'a digit'; }*/
+  :   '0'..'9'
+  ;
+
+
+/**
+ * Matches an allowed escaped character.
+ */
+fragment ESCAPE
+  : '\\' ('"' | '\\' | 'n' | 't' | 'r')
+  ;
+
diff --git a/src/parser/smt/smt_input.cpp b/src/parser/smt/smt_input.cpp
new file mode 100644
index 000000000..7a28c30f1
--- /dev/null
+++ b/src/parser/smt/smt_input.cpp
@@ -0,0 +1,73 @@
+/*
+ * smt_parser.cpp
+ *
+ *  Created on: Mar 5, 2010
+ *      Author: chris
+ */
+
+#include <antlr3.h>
+
+#include "expr/expr_manager.h"
+#include "parser/parser_exception.h"
+#include "parser/smt/smt_input.h"
+#include "parser/smt/generated/SmtLexer.h"
+#include "parser/smt/generated/SmtParser.h"
+
+namespace CVC4 {
+namespace parser {
+
+/* Use lookahead=2 */
+SmtInput::SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap) :
+  AntlrInput(exprManager,filename,2,useMmap) {
+  init();
+}
+
+SmtInput::SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name) :
+  AntlrInput(exprManager,input,name,2) {
+  init();
+}
+
+void SmtInput::init() {
+  pANTLR3_INPUT_STREAM input = getInputStream();
+  AlwaysAssert( input != NULL );
+
+  d_pSmtLexer = SmtLexerNew(input);
+  if( d_pSmtLexer == NULL ) {
+    throw ParserException("Failed to create SMT lexer.");
+  }
+
+  setLexer( d_pSmtLexer->pLexer );
+
+  pANTLR3_COMMON_TOKEN_STREAM tokenStream = getTokenStream();
+  AlwaysAssert( tokenStream != NULL );
+
+  d_pSmtParser = SmtParserNew(tokenStream);
+  if( d_pSmtParser == NULL ) {
+    throw ParserException("Failed to create SMT parser.");
+  }
+
+  setParser(d_pSmtParser->pParser);
+  SetSmtInput(this);
+}
+
+
+SmtInput::~SmtInput() {
+  d_pSmtLexer->free(d_pSmtLexer);
+  d_pSmtParser->free(d_pSmtParser);
+}
+
+Command* SmtInput::doParseCommand() throw (ParserException) {
+  return d_pSmtParser->parseCommand(d_pSmtParser);
+}
+
+Expr SmtInput::doParseExpr() throw (ParserException) {
+  return d_pSmtParser->parseExpr(d_pSmtParser);
+}
+
+pANTLR3_LEXER SmtInput::getLexer() {
+  return d_pSmtLexer->pLexer;
+}
+
+} // namespace parser
+
+} // namespace CVC4
diff --git a/src/parser/smt/smt_input.h b/src/parser/smt/smt_input.h
new file mode 100644
index 000000000..b3613d67b
--- /dev/null
+++ b/src/parser/smt/smt_input.h
@@ -0,0 +1,47 @@
+/*
+ * smt_parser.h
+ *
+ *  Created on: Mar 5, 2010
+ *      Author: chris
+ */
+
+#ifndef SMT_PARSER_H_
+#define SMT_PARSER_H_
+
+#include "parser/antlr_input.h"
+#include "parser/smt/generated/SmtLexer.h"
+#include "parser/smt/generated/SmtParser.h"
+
+// extern void SmtParserSetAntlrParser(CVC4::parser::AntlrParser* newAntlrParser);
+
+namespace CVC4 {
+
+class Command;
+class Expr;
+class ExprManager;
+
+namespace parser {
+
+class SmtInput : public AntlrInput {
+public:
+  SmtInput(ExprManager* exprManager, const std::string& filename, bool useMmap);
+  SmtInput(ExprManager* exprManager, const std::string& input, const std::string& name);
+  ~SmtInput();
+
+protected:
+  Command* doParseCommand() throw(ParserException);
+  Expr doParseExpr() throw(ParserException);
+  pANTLR3_LEXER getLexer();
+  pANTLR3_LEXER createLexer(pANTLR3_INPUT_STREAM input);
+  pANTLR3_PARSER createParser(pANTLR3_COMMON_TOKEN_STREAM tokenStream);
+
+private:
+  void init();
+  pSmtLexer d_pSmtLexer;
+  pSmtParser d_pSmtParser;
+}; // class SmtInput
+} // namespace parser
+
+} // namespace CVC4
+
+#endif /* SMT_PARSER_H_ */
diff --git a/src/parser/smt/smt_lexer.g b/src/parser/smt/smt_lexer.g
deleted file mode 100644
index d694cd93f..000000000
--- a/src/parser/smt/smt_lexer.g
+++ /dev/null
@@ -1,220 +0,0 @@
-/* *******************                                                        */
-/*  smt_lexer.g
- ** Original author: dejan
- ** Major contributors: cconway, mdeters
- ** Minor contributors (to current version): none
- ** This file is part of the CVC4 prototype.
- ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
- ** Courant Institute of Mathematical Sciences
- ** New York University
- ** See the file COPYING in the top-level source directory for licensing
- ** information.
- **
- ** Lexer for SMT-LIB input language.
- **/
-
-options {
-  language = "Cpp";            // C++ output for antlr
-  namespaceStd  = "std";       // Cosmetic option to get rid of long defines in generated code
-  namespaceAntlr = "antlr";    // Cosmetic option to get rid of long defines in generated code
-  namespace = "CVC4::parser";  // Wrap everything in the smtparser namespace
-}
-
-/**
- * AntlrSmtLexer class is a stream tokenizer (lexer) for the SMT-LIB benchmark
- * language.
- */
-class AntlrSmtLexer extends Lexer;
-
-options {
-  exportVocab = SmtVocabulary;  // Name of the shared token vocabulary
-  testLiterals = false;         // Do not check for literals by default
-  defaultErrorHandler = false;  // Skip the default error handling, just break with exceptions
-  k = 2;
-}
-
-tokens {
-  // Base SMT-LIB tokens
-  DISTINCT      = "distinct";
-  ITE           = "ite";
-  IF_THEN_ELSE  = "if_then_else";
-  TRUE          = "true";
-  FALSE         = "false";
-  NOT           = "not";
-  IMPLIES       = "implies";
-  AND           = "and";
-  OR            = "or";
-  XOR           = "xor";
-  IFF           = "iff";
-  EXISTS        = "exists";
-  FORALL        = "forall";
-  LET           = "let";
-  FLET          = "flet";
-  THEORY        = "theory";
-  LOGIC         = "logic";
-  SAT           = "sat";
-  UNSAT         = "unsat";
-  UNKNOWN       = "unknown";
-  BENCHMARK     = "benchmark";
-  // The SMT attribute tokens
-  LOGIC_ATTR       = ":logic";
-  ASSUMPTION_ATTR  = ":assumption";
-  FORMULA_ATTR     = ":formula";
-  STATUS_ATTR      = ":status";
-  EXTRASORTS_ATTR  = ":extrasorts";
-  EXTRAFUNS_ATTR   = ":extrafuns";
-  EXTRAPREDS_ATTR  = ":extrapreds";
-  C_NOTES       = ":notes";
-  // arithmetic symbols
-  EQUAL         = "=";
-  LESS_THAN     = "<";
-  GREATER_THAN  = ">";
-  AMPERSAND     = "&";
-  AT            = "@";
-  POUND         = "#";
-  PLUS          = "+";
-  MINUS         = "-";
-  STAR          = "*";
-  DIV           = "/";
-  PERCENT       = "%";
-  PIPE          = "|";
-  TILDE         = "~";
-}
-
-/**
- * Matches any letter ('a'-'z' and 'A'-'Z').
- */
-protected
-ALPHA options { paraphrase = "a letter"; }
-  :  'a'..'z'
-  |  'A'..'Z'
-  ;
-
-/**
- * Matches the digits (0-9)
- */
-protected
-DIGIT options { paraphrase = "a digit"; }
-  :   '0'..'9'
-  ;
-
-/**
- * Matches an identifier from the input. An identifier is a sequence of letters,
- * digits and "_", "'", "." symbols, starting with a letter.
- */
-IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; }
-  :  ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*
-  ;
-
-/**
- * Matches an identifier starting with a colon. An identifier is a sequence of letters,
- * digits and "_", "'", "." symbols, starting with a colon.
- */
-C_IDENTIFIER options { paraphrase = "an attribute (e.g., ':x')"; testLiterals = true; }
-  :  ':' ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*
-  ;
-
-VAR_IDENTIFIER options { paraphrase = "a variable (e.g., '?x')"; testLiterals = false; }
-  :  '?' IDENTIFIER
-  ;
-
-FUN_IDENTIFIER options { paraphrase = "a function variable (e.g, '$x')"; testLiterals = false; }
-  :  '$' IDENTIFIER
-  ;
-
-
-/**
- * Matches the value of user-defined annotations or attributes. The only constraint imposed on a user-defined value is that it start with
- * an open brace and end with closed brace.
- */
-USER_VALUE
-  :   '{'
-      ( '\n' { newline(); }
-      | ~('{' | '}' | '\n')
-        )*
-    '}'
-  ;
-
-/**
- * Matches the question mark symbol ('?').
- */
-QUESTION_MARK options { paraphrase = "a question mark '?'"; }
-  :  '?'
-  ;
-
-/**
- * Matches the dollar sign ('$').
- */
-DOLLAR_SIGN options { paraphrase = "a dollar sign '$'"; }
-  :  '$'
-  ;
-
-/**
- * Matches the left bracket ('(').
- */
-LPAREN options { paraphrase = "a left parenthesis '('"; }
-  :   '(';
-
-/**
- * Matches the right bracket ('(').
- */
-RPAREN options { paraphrase = "a right parenthesis ')'"; }
-  :   ')';
-
-/**
- * Matches and skips whitespace in the input.
- */
-WHITESPACE options { paraphrase = "whitespace"; }
-  :  (' ' | '\t' | '\f')              { $setType(antlr::Token::SKIP); }
-  ;
-
-/**
- * Matches and skips the newline symbols in the input.
- */
-NEWLINE options { paraphrase = "a newline"; }
-  :   ('\r' '\n' | '\r' | '\n')       { $setType(antlr::Token::SKIP); newline(); }
-  ;
-
-/**
- * Matches a numeral from the input (non-empty sequence of digits).
- */
-NUMERAL options { paraphrase = "a numeral"; }
-  :  (DIGIT)+
-  ;
-
-/**
- * Matches an allowed escaped character.
- */
-protected ESCAPE
-  : '\\' ('"' | '\\' | 'n' | 't' | 'r')
-  ;
-
-/**
- * Matches a double quoted string literal. Escaping is supported, and escape
- * character '\' has to be escaped.
- */
-STRING_LITERAL options { paraphrase = "a string literal"; }
-  :  '"' (ESCAPE | ~('"'|'\\'))* '"'
-  ;
-
-/**
- * Matches the comments and ignores them
- */
-COMMENT options { paraphrase = "comment"; }
-  : ';' (~('\n' | '\r'))*                    { $setType(antlr::Token::SKIP); }
-  ;
-
-/* Arithmetic symbol tokens */
-EQUAL :   "=";
-LESS_THAN : "<";
-GREATER_THAN : ">";
-AMPERSAND :  "&";
-AT :  "@";
-POUND :  "#";
-PLUS :  "+";
-MINUS :  "-";
-STAR :  "*";
-DIV :  "/";
-PERCENT :  "%";
-PIPE :  "|";
-TILDE : "~";
diff --git a/src/parser/smt/smt_parser.g b/src/parser/smt/smt_parser.g
deleted file mode 100644
index b876e1649..000000000
--- a/src/parser/smt/smt_parser.g
+++ /dev/null
@@ -1,351 +0,0 @@
-/* *******************                                                        */
-/*  smt_parser.g
- ** Original author: dejan
- ** Major contributors: mdeters, cconway
- ** Minor contributors (to current version): none
- ** This file is part of the CVC4 prototype.
- ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
- ** Courant Institute of Mathematical Sciences
- ** New York University
- ** See the file COPYING in the top-level source directory for licensing
- ** information.
- **
- ** Parser for SMT-LIB input language.
- **/
-
-header "post_include_hpp" {
-#include "parser/antlr_parser.h"
-#include "expr/command.h"
-}
-
-header "post_include_cpp" {
-#include "expr/type.h"
-#include "util/output.h"
-#include <vector>
-
-using namespace std;
-using namespace CVC4;
-using namespace CVC4::parser;
-}
-
-options {
-  language = "Cpp";                  // C++ output for antlr
-  namespaceStd = "std";              // Cosmetic option to get rid of long defines in generated code
-  namespaceAntlr = "antlr";          // Cosmetic option to get rid of long defines in generated code
-  namespace = "CVC4::parser";        // Wrap everything in the smtparser namespace
-}
-
-/**
- * AntlrSmtParser class is the parser for the SMT-LIB files.
- */
-class AntlrSmtParser extends Parser("AntlrParser");
-options {
-  genHashLines = true;              // Include line number information
-  importVocab = SmtVocabulary;      // Import the common vocabulary
-  defaultErrorHandler = false;      // Skip the default error handling, just break with exceptions
-  k = 2;
-}
-
-/**
- * Parses an expression.
- * @return the parsed expression
- */
-parseExpr returns [CVC4::Expr expr]
-  : expr = annotatedFormula
-  | EOF
-  ;
-
-/**
- * Parses a command (the whole benchmark)
- * @return the command of the benchmark
- */
-parseCommand returns [CVC4::Command* cmd]
-  : cmd = benchmark
-  ;
-
-/**
- * Matches the whole SMT-LIB benchmark.
- * @return the sequence command containing the whole problem
- */
-benchmark returns [Command* cmd]
-  : LPAREN BENCHMARK IDENTIFIER cmd = benchAttributes RPAREN
-  | EOF { cmd = 0; }
-  ;
-
-/**
- * Matches a sequence of benchmark attributes and returns a pointer to a
- * command sequence.
- * @return the command sequence
- */
-benchAttributes returns [CVC4::CommandSequence* cmd_seq = new CommandSequence()]
-{
-  Command* cmd;
-}
-  : (cmd = benchAttribute { if (cmd) cmd_seq->addCommand(cmd); } )+
-  ;
-
-/**
- * Matches a benchmark attribute, sucha as ':logic', ':formula', and returns
- * a corresponding command
- * @retrurn a command corresponding to the attribute
- */
-benchAttribute returns [Command* smt_command = 0]
-{
-  Expr formula;
-  string logic;
-  SetBenchmarkStatusCommand::BenchmarkStatus b_status = SetBenchmarkStatusCommand::SMT_UNKNOWN;
-}
-  : LOGIC_ATTR logic = identifier
-    { setLogic(logic);
-      smt_command = new SetBenchmarkLogicCommand(logic);   }
-  | ASSUMPTION_ATTR formula = annotatedFormula
-    { smt_command = new AssertCommand(formula);   }
-  | FORMULA_ATTR formula = annotatedFormula
-    { smt_command = new CheckSatCommand(formula); }
-  | STATUS_ATTR b_status = status
-    { smt_command = new SetBenchmarkStatusCommand(b_status); }
-  | EXTRAFUNS_ATTR LPAREN (functionDeclaration)+ RPAREN
-  | EXTRAPREDS_ATTR LPAREN (predicateDeclaration)+ RPAREN
-  | EXTRASORTS_ATTR LPAREN (sortDeclaration)+ RPAREN
-  | NOTES_ATTR STRING_LITERAL
-  | annotation
-  ;
-
-/**
- * Matches an annotated formula.
- * @return the expression representing the formula
- */
-annotatedFormula returns [CVC4::Expr formula]
-{
-  Debug("parser") << "annotated formula: " << LT(1)->getText() << endl;
-  Kind kind = CVC4::kind::UNDEFINED_KIND;
-  vector<Expr> args;
-  std::string name;
-  Expr expr1, expr2, expr3;
-}
-  : /* a built-in operator application */
-    LPAREN kind = builtinOp annotatedFormulas[args] RPAREN
-    { checkArity(kind, args.size());
-      if((kind == kind::AND || kind == kind::OR) && args.size() == 1) {
-        formula = args[0];
-      } else {
-        formula = mkExpr(kind, args);
-      }
-    }
-
-  | /* a "distinct" expr */
-    /* TODO: Should there be a DISTINCT kind in the Expr package? */
-    LPAREN DISTINCT annotatedFormulas[args] RPAREN
-    { formula = mkExpr(CVC4::kind::DISTINCT, args); }
-
-  | /* An ite expression */
-    LPAREN (ITE | IF_THEN_ELSE)
-    { Expr test, trueExpr, falseExpr; }
-    test = annotatedFormula
-    trueExpr = annotatedFormula
-    falseExpr = annotatedFormula
-    RPAREN
-    { formula = mkExpr(CVC4::kind::ITE, test, trueExpr, falseExpr); }
-
-  | /* A let/flet binding */
-    LPAREN (LET LPAREN name=variable[CHECK_UNDECLARED] 
-            | FLET LPAREN name=function_var[CHECK_UNDECLARED] )
-            expr1=annotatedFormula RPAREN
-    { defineVar(name,expr1); }
-    formula=annotatedFormula
-    { undefineVar(name); }
-    RPAREN
-
-  | /* A non-built-in function application */
-    { Expr f; }
-    LPAREN f = functionSymbol
-    { args.push_back(f); }
-    annotatedFormulas[args] RPAREN
-    // TODO: check arity
-    { formula = mkExpr(CVC4::kind::APPLY_UF, args); }
-
-  | /* a variable */
-    { std::string name; }
-    ( name = identifier[CHECK_DECLARED]
-      | name = variable[CHECK_DECLARED]
-      | name = function_var[CHECK_DECLARED] )
-    { formula = getVariable(name); }
-
-    /* constants */
-  | TRUE          { formula = getTrueExpr(); }
-  | FALSE         { formula = getFalseExpr(); }
-    /* TODO: quantifiers, arithmetic constants */
-  ;
-
-/**
- * Matches a sequence of annotaed formulas and puts them into the formulas
- * vector.
- * @param formulas the vector to fill with formulas
- */
-annotatedFormulas[std::vector<Expr>& formulas]
-{
-  Expr f;
-}
-  : ( f = annotatedFormula { formulas.push_back(f); } )+
-  ;
-
-/**
-* Matches on of the unary Boolean connectives.
-* @return the kind of the Bollean connective
-*/
-builtinOp returns [CVC4::Kind kind]
-{
-  Debug("parser") << "builtin: " << LT(1)->getText() << endl;
-}
-  : NOT      { kind = CVC4::kind::NOT;     }
-  | IMPLIES  { kind = CVC4::kind::IMPLIES; }
-  | AND      { kind = CVC4::kind::AND;     }
-  | OR       { kind = CVC4::kind::OR;      }
-  | XOR      { kind = CVC4::kind::XOR;     }
-  | IFF      { kind = CVC4::kind::IFF;     }
-  | EQUAL    { kind = CVC4::kind::EQUAL;   }
-    /* TODO: lt, gt, plus, minus, etc. */
-  ;
-
-/**
- * Matches a (possibly undeclared) predicate identifier (returning the string).
- * @param check what kind of check to do with the symbol
- */
-predicateName[DeclarationCheck check = CHECK_NONE] returns [std::string p]
-  :  p = identifier[check]
-  ;
-
-/**
- * Matches a (possibly undeclared) function identifier (returning the string)
- * @param check what kind of check to do with the symbol
- */
-functionName[DeclarationCheck check = CHECK_NONE] returns [std::string name]
-  :  name = identifier[check,SYM_FUNCTION]
-  ;
-
-/**
- * Matches an previously declared function symbol (returning an Expr)
- */
-functionSymbol returns [CVC4::Expr fun]
-{
-  string name;
-}
-  : name = functionName[CHECK_DECLARED]
-    { checkFunction(name);
-      fun = getFunction(name); }
-  ;
-
-/**
- * Matches an attribute name from the input (:attribute_name).
- */
-attribute
-  :  C_IDENTIFIER
-  ;
-
-variable[DeclarationCheck check = CHECK_NONE] returns [std::string name]
-  : x:VAR_IDENTIFIER
-    { name = x->getText();
-      checkDeclaration(name, check, SYM_VARIABLE); }
-  ;
-
-function_var[DeclarationCheck check = CHECK_NONE] returns [std::string name]
-  : x:FUN_IDENTIFIER
-    { name = x->getText();
-      checkDeclaration(name, check, SYM_FUNCTION); }
-  ;
-
-/**
- * Matches the sort symbol, which can be an arbitrary identifier.
- * @param check the check to perform on the name
- */
-sortName[DeclarationCheck check = CHECK_NONE] returns [std::string name]
-  : name = identifier[check,SYM_SORT]
-  ;
-
-sortSymbol returns [CVC4::Type* t]
-{
-  std::string name;
-}
-  : name = sortName { t = getSort(name); }
-  ;
-
-functionDeclaration
-{
-  string name;
-  Type* t;
-  std::vector<Type*> sorts;
-}
-  : LPAREN name = functionName[CHECK_UNDECLARED]
-      t = sortSymbol // require at least one sort
-    { sorts.push_back(t); }
-      sortList[sorts] RPAREN
-    { t = functionType(sorts);
-      mkVar(name, t); }
-  ;
-
-/**
- * Matches the declaration of a predicate and declares it
- */
-predicateDeclaration
-{
-  string p_name;
-  std::vector<Type*> p_sorts;
-  Type *t;
-}
-  : LPAREN p_name = predicateName[CHECK_UNDECLARED] sortList[p_sorts] RPAREN
-    { t = predicateType(p_sorts);
-      mkVar(p_name, t); }
-  ;
-
-sortDeclaration
-{
-  string name;
-}
-  : name = sortName[CHECK_UNDECLARED]
-    { newSort(name); }
-  ;
-
-/**
- * Matches a sequence of sort symbols and fills them into the given vector.
- */
-sortList[std::vector<Type*>& sorts]
-{
-  Type* t;
-}
-  : ( t = sortSymbol { sorts.push_back(t); })*
-  ;
-
-/**
- * Matches the status of the benchmark, one of 'sat', 'unsat' or 'unknown'.
- */
-status returns [ SetBenchmarkStatusCommand::BenchmarkStatus status ]
-  : SAT       { status = SetBenchmarkStatusCommand::SMT_SATISFIABLE;    }
-  | UNSAT     { status = SetBenchmarkStatusCommand::SMT_UNSATISFIABLE;  }
-  | UNKNOWN   { status = SetBenchmarkStatusCommand::SMT_UNKNOWN;        }
-  ;
-
-/**
- * Matches an annotation, which is an attribute name, with an optional user
- */
-annotation
-  : attribute (USER_VALUE)?
-  ;
-
-/**
- * Matches an identifier and returns a string.
- * @param check what kinds of check to do on the symbol
- * @return the id string
- */
-identifier[DeclarationCheck check = CHECK_NONE,
-           SymbolType type = SYM_VARIABLE]
-returns [std::string id]
-{
-  Debug("parser") << "identifier: " << LT(1)->getText()
-                  << " check? " << toString(check)
-                  << " type? " << toString(type) << endl;
-}
-  : x:IDENTIFIER
-    { id = x->getText();
-      checkDeclaration(id, check, type); }
-  ;
-
author	Christopher L. Conway <christopherleeconway@gmail.com>	2010-03-30 20:22:33 +0000
committer	Christopher L. Conway <christopherleeconway@gmail.com>	2010-03-30 20:22:33 +0000
commit	8730e9320a833a9eb0e65074f9988950b7424c0c (patch)
tree	1cb09404256743e208fece079ba473595e05edcd /src/parser/smt
parent	8c87c05ac56a5f29b2ae1e658f2d7d3b7b588163 (diff)