diff options
author | Morgan Deters <mdeters@gmail.com> | 2011-04-20 05:37:38 +0000 |
---|---|---|
committer | Morgan Deters <mdeters@gmail.com> | 2011-04-20 05:37:38 +0000 |
commit | 12c1e41862e4b12c3953272416a1edc103d299ee (patch) | |
tree | 9c7d3a044c33ffc3be177e6ca692eb4149add27c /src/parser | |
parent | 08df44e6b61999a14dd24a7a134146694dcb3596 (diff) |
Tuesday end-of-day commit.
Expected performance impact outside of datatypes/CVC parser is
negligible.
* CVC language LAMBDA, functional LET, type LET, precedence fixes,
bitvectors, and arrays, with partial parsing support also for
quantifiers, tuples, subranges, subtypes, and records
* support for complex recursive DATATYPE selectors, e.g.
tree = node(children:ARRAY INT OF tree) | leaf(data:INT)
these are complicated because they have to be left unresolved
at parse time and dealt with in a second pass.
* bugfix for Exprs/Types that occurred when setting them to null
(not Nodes/TypeNodes, just Exprs/Types).
* Cleanup/code review items
Diffstat (limited to 'src/parser')
-rw-r--r-- | src/parser/antlr_input.h | 5 | ||||
-rw-r--r-- | src/parser/cvc/Cvc.g | 719 | ||||
-rw-r--r-- | src/parser/cvc/cvc_input.h | 5 | ||||
-rw-r--r-- | src/parser/input.h | 3 | ||||
-rw-r--r-- | src/parser/parser.cpp | 81 | ||||
-rw-r--r-- | src/parser/parser.h | 105 | ||||
-rw-r--r-- | src/parser/parser_builder.cpp | 22 | ||||
-rw-r--r-- | src/parser/parser_builder.h | 50 | ||||
-rw-r--r-- | src/parser/smt/Smt.g | 8 | ||||
-rw-r--r-- | src/parser/smt/smt.cpp | 8 | ||||
-rw-r--r-- | src/parser/smt/smt.h | 4 | ||||
-rw-r--r-- | src/parser/smt/smt_input.h | 9 | ||||
-rw-r--r-- | src/parser/smt2/Smt2.g | 4 | ||||
-rw-r--r-- | src/parser/smt2/smt2.cpp | 10 | ||||
-rw-r--r-- | src/parser/smt2/smt2.h | 8 | ||||
-rw-r--r-- | src/parser/smt2/smt2_input.h | 5 |
16 files changed, 737 insertions, 309 deletions
diff --git a/src/parser/antlr_input.h b/src/parser/antlr_input.h index c48185f58..4ae063266 100644 --- a/src/parser/antlr_input.h +++ b/src/parser/antlr_input.h @@ -54,10 +54,10 @@ class AntlrInputStream : public InputStream { bool fileIsTemporary = false); /* This is private and unimplemented, because you should never use it. */ - AntlrInputStream(const AntlrInputStream& inputStream); + AntlrInputStream(const AntlrInputStream& inputStream) CVC4_UNUSED; /* This is private and unimplemented, because you should never use it. */ - AntlrInputStream& operator=(const AntlrInputStream& inputStream); + AntlrInputStream& operator=(const AntlrInputStream& inputStream) CVC4_UNUSED; public: @@ -178,6 +178,7 @@ public: /** Get a bitvector constant from the text of the number and the size token */ static BitVector tokenToBitvector(pANTLR3_COMMON_TOKEN number, pANTLR3_COMMON_TOKEN size); + /** Retrieve the remaining text in this input. */ std::string getUnparsedText(); protected: diff --git a/src/parser/cvc/Cvc.g b/src/parser/cvc/Cvc.g index 30866eddb..792f53605 100644 --- a/src/parser/cvc/Cvc.g +++ b/src/parser/cvc/Cvc.g @@ -19,8 +19,13 @@ grammar Cvc; options { - language = 'C'; // C output for antlr -// defaultErrorHandler = false; // Skip the default error handling, just break with exceptions + // C output for antlr + language = 'C'; + + // Skip the default error handling, just break with exceptions + // defaultErrorHandler = false; + + // Only lookahead of <= k requested (disable for LL* parsing) k = 2; } @@ -61,8 +66,13 @@ tokens { OF_TOK = 'OF'; WITH_TOK = 'WITH'; + SUBTYPE_TOK = 'SUBTYPE'; + FORALL_TOK = 'FORALL'; EXISTS_TOK = 'EXISTS'; + PATTERN_TOK = 'PATTERN'; + + LAMBDA = 'LAMBDA'; // Symbols @@ -77,6 +87,7 @@ tokens { SEMICOLON = ';'; BAR = '|'; DOT = '.'; + DOTDOT = '..'; SQHASH = '[#'; HASHSQ = '#]'; @@ -104,8 +115,6 @@ tokens { INTDIV_TOK = 'DIV'; FLOOR_TOK = 'FLOOR'; - //IS_INTEGER_TOK = 'IS_INTEGER'; - // Bitvectors BITVECTOR_TOK = 'BITVECTOR'; @@ -131,9 +140,10 @@ tokens { BVCOMP_TOK = 'BVCOMP'; BVXNOR_TOK = 'BVXNOR'; CONCAT_TOK = '@'; - BVTOINT_TOK = 'BVTOINT'; - INTTOBV_TOK = 'INTTOBV'; - BOOLEXTRACT_TOK = 'BOOLEXTRACT'; + //BVTOINT_TOK = 'BVTOINT'; + //INTTOBV_TOK = 'INTTOBV'; + //BOOLEXTRACT_TOK = 'BOOLEXTRACT'; + //IS_INTEGER_TOK = 'IS_INTEGER'; BVLT_TOK = 'BVLT'; BVGT_TOK = 'BVGT'; BVLE_TOK = 'BVLE'; @@ -461,11 +471,15 @@ command returns [CVC4::Command* cmd = 0] // Datatypes can be mututally-recursive if they're in the same // definition block, separated by a comma. So we parse everything // and then ask the ExprManager to resolve everything in one go. - | DATATYPE_TOK datatypeDef[dts] + | DATATYPE_TOK + { /* open a scope to keep the UnresolvedTypes contained */ + PARSER_STATE->pushScope(); } + datatypeDef[dts] ( COMMA datatypeDef[dts] )* END_TOK SEMICOLON - { cmd = new DatatypeDeclarationCommand(PARSER_STATE->mkMutualDatatypeTypes(dts)); } - | declaration[cmd] + { PARSER_STATE->popScope(); + cmd = new DatatypeDeclarationCommand(PARSER_STATE->mkMutualDatatypeTypes(dts)); } + | toplevelDeclaration[cmd] | EOF ; @@ -486,46 +500,154 @@ symbolicExpr[CVC4::SExpr& sexpr] ; /** - * Match a declaration + * Match a top-level declaration. */ -declaration[CVC4::Command*& cmd] +toplevelDeclaration[CVC4::Command*& cmd] @init { std::vector<std::string> ids; Type t; Debug("parser-extra") << "declaration: " << AntlrInput::tokenText(LT(1)) << std::endl; } - : // FIXME: These could be type or function declarations, if that matters. - identifierList[ids, CHECK_UNDECLARED, SYM_VARIABLE] COLON declType[t, ids] SEMICOLON - { cmd = new DeclarationCommand(ids, t); } + : identifierList[ids,CHECK_NONE,SYM_VARIABLE] COLON + ( declareVariables[t,ids,true] { cmd = new DeclarationCommand(ids, t); } + | declareTypes[ids] { cmd = new DeclarationCommand(ids, EXPR_MANAGER->kindType()); } ) SEMICOLON + ; + +/** + * A bound variable declaration. + */ +boundVarDecl[std::vector<std::string>& ids, CVC4::Type& t] + : identifierList[ids,CHECK_NONE,SYM_VARIABLE] COLON declareVariables[t,ids,false] ; -/** Match the right-hand side of a declaration. Returns the type. */ -declType[CVC4::Type& t, std::vector<std::string>& idList] +/** + * A series of bound variable declarations. + */ +boundVarDecls @init { - Expr f; - Debug("parser-extra") << "declType: " << AntlrInput::tokenText(LT(1)) << std::endl; + std::vector<std::string> ids; + Type t; } - : /* A sort declaration (e.g., "T : TYPE") */ - TYPE_TOK - { PARSER_STATE->mkSorts(idList); - t = EXPR_MANAGER->kindType(); } - | /* A type alias */ - TYPE_TOK EQUAL_TOK type[t] + : boundVarDecl[ids,t] ( COMMA boundVarDecl[ids,t] )* + ; + +boundVarDeclsReturn[std::vector<CVC4::Expr>& terms, + std::vector<CVC4::Type>& types] +@init { + std::vector<std::string> ids; + Type t; + terms.clear(); + types.clear(); +} + : boundVarDeclReturn[terms,types] ( COMMA boundVarDeclReturn[terms,types] )* + ; + +boundVarDeclReturn[std::vector<CVC4::Expr>& terms, + std::vector<CVC4::Type>& types] +@init { + std::vector<std::string> ids; + Type t; + // NOTE: do not clear the vectors here! +} + : identifierList[ids,CHECK_NONE,SYM_VARIABLE] COLON type[t,CHECK_DECLARED] + { const std::vector<Expr>& vars = PARSER_STATE->mkVars(ids, t); + terms.insert(terms.end(), vars.begin(), vars.end()); + for(unsigned i = 0; i < vars.size(); ++i) { + types.push_back(t); + } + } + ; + +/** + * Match the right-hand-side of a type declaration. Unlike + * declareVariables[] below, don't need to return the Type, since it's + * always the KindType (the type of types). Also don't need toplevel + * because type declarations are always top-level, except for + * type-lets, which don't use this rule. + */ +declareTypes[const std::vector<std::string>& idList] +@init { + Type t; +} + /* A sort declaration (e.g., "T : TYPE") */ + : TYPE_TOK + { for(std::vector<std::string>::const_iterator i = idList.begin(); + i != idList.end(); + ++i) { + // Don't allow a type variable to clash with a previously + // declared type variable, however a type variable and a + // non-type variable can clash unambiguously. Break from CVC3 + // behavior here. + PARSER_STATE->checkDeclaration(*i, CHECK_UNDECLARED, SYM_SORT); + } + PARSER_STATE->mkSorts(idList); + } + + /* A type alias "T : TYPE = foo..." */ + | TYPE_TOK EQUAL_TOK type[t,CHECK_DECLARED] { for(std::vector<std::string>::const_iterator i = idList.begin(); i != idList.end(); ++i) { + PARSER_STATE->checkDeclaration(*i, CHECK_UNDECLARED, SYM_SORT); PARSER_STATE->defineType(*i, t); } - t = EXPR_MANAGER->kindType(); } - | /* A variable declaration */ - type[t] ( EQUAL_TOK formula[f] )? + } + ; + +/** + * Match the right-hand side of a variable declaration. Returns the + * type. Some IDs might have been declared previously, and are not + * re-declared if topLevel is true (CVC allows re-declaration if the + * types are compatible---if they aren't compatible, an error is + * thrown). Also if topLevel is true, variable definitions are + * permitted. + */ +declareVariables[CVC4::Type& t, const std::vector<std::string>& idList, bool topLevel] +@init { + Expr f; + Debug("parser-extra") << "declType: " << AntlrInput::tokenText(LT(1)) << std::endl; +} + /* A variable declaration (or definition) */ + : type[t,CHECK_DECLARED] ( EQUAL_TOK formula[f] )? { if(f.isNull()) { - PARSER_STATE->mkVars(idList, t); + Debug("parser") << "working on " << idList.front() << " : " << t << std::endl; + // CVC language allows redeclaration of variables if types are the same + for(std::vector<std::string>::const_iterator i = idList.begin(), + i_end = idList.end(); + i != i_end; + ++i) { + if(PARSER_STATE->isDeclared(*i, SYM_VARIABLE)) { + Type oldType = PARSER_STATE->getType(*i); + Debug("parser") << " " << *i << " was declared previously " + << "with type " << oldType << std::endl; + if(oldType != t) { + std::stringstream ss; + ss << Expr::setlanguage(language::output::LANG_CVC4) + << "incompatible type for `" << *i << "':" << std::endl + << " old type: " << oldType << std::endl + << " new type: " << t << std::endl; + PARSER_STATE->parseError(ss.str()); + } else { + Debug("parser") << " types " << t << " and " << oldType + << " are compatible" << std::endl; + } + } else { + Debug("parser") << " " << *i << " not declared" << std::endl; + PARSER_STATE->mkVar(*i, t); + } + } } else { + if(!topLevel) { + // must be top-level; doesn't make sense to write something + // like e.g. FORALL(x:INT = 4): [...] + PARSER_STATE->parseError("cannot construct a definition here; maybe you want a LET"); + } + Debug("parser") << "made " << idList.front() << " = " << f << std::endl; for(std::vector<std::string>::const_iterator i = idList.begin(), i_end = idList.end(); i != i_end; ++i) { + PARSER_STATE->checkDeclaration(*i, CHECK_UNDECLARED, SYM_VARIABLE); PARSER_STATE->defineFunction(*i, f); } } @@ -533,29 +655,6 @@ declType[CVC4::Type& t, std::vector<std::string>& idList] ; /** - * Match the type in a declaration and do the declaration binding. - * TODO: Actually parse sorts into Type objects. - */ -type[CVC4::Type& t] -@init { - Type t2; - std::vector<Type> typeList; - Debug("parser-extra") << "type: " << AntlrInput::tokenText(LT(1)) << std::endl; -} - : /* Simple type */ - bitvectorType[t] - | /* Single-parameter function type */ - baseType[t] ARROW_TOK baseType[t2] - { t = EXPR_MANAGER->mkFunctionType(t, t2); } - | /* Multi-parameter function type */ - LPAREN baseType[t] - { typeList.push_back(t); } - ( COMMA baseType[t] { typeList.push_back(t); } )* - RPAREN ARROW_TOK baseType[t] - { t = EXPR_MANAGER->mkFunctionType(typeList, t); } - ; - -/** * Matches a list of identifiers separated by a comma and puts them in the * given list. * @param idList the list to fill with identifiers. @@ -566,9 +665,10 @@ identifierList[std::vector<std::string>& idList, CVC4::parser::SymbolType type] @init { std::string id; + idList.clear(); } : identifier[id,check,type] { idList.push_back(id); } - (COMMA identifier[id,check,type] { idList.push_back(id); })* + ( COMMA identifier[id,check,type] { idList.push_back(id); } )* ; @@ -584,53 +684,145 @@ identifier[std::string& id, ; /** - * Matches a bitvector type. + * Match the type in a declaration and do the declaration binding. */ -bitvectorType[CVC4::Type& t] - : arrayType[t] - | BITVECTOR_TOK LPAREN INTEGER_LITERAL RPAREN - { t = EXPR_MANAGER->mkBitVectorType(AntlrInput::tokenToUnsigned($INTEGER_LITERAL)); } - ; - -arrayType[CVC4::Type& t] +type[CVC4::Type& t, + CVC4::parser::DeclarationCheck check] @init { Type t2; + bool lhs; + std::vector<Type> args; } - : baseType[t] - | ARRAY_TOK bitvectorType[t] OF_TOK bitvectorType[t2] - { t = EXPR_MANAGER->mkArrayType(t, t2); } + /* a type, possibly a function */ + : restrictedTypePossiblyFunctionLHS[t,check,lhs] + { if(lhs) { + Assert(t.isTuple()); + args = TupleType(t).getTypes(); + } else { + args.push_back(t); + } + Debug("parser") << "type: " << t << std::endl; + } + ( ARROW_TOK type[t2,check] { args.push_back(t2); } )? + { if(t2.isNull()) { + if(lhs) { + PARSER_STATE->parseError("improperly-placed type list; expected `->' after to define a function; or else maybe these parentheses were meant to be square brackets, to define a tuple type?"); + } + } else { + t = EXPR_MANAGER->mkFunctionType(args); + } + } + + /* type-lets: typeLetDecl defines the types, we just manage the + * scopes here. NOTE that LET in the CVC language is always + * sequential! */ + | LET_TOK { PARSER_STATE->pushScope(); } + typeLetDecl[check] ( COMMA typeLetDecl[check] )* IN_TOK type[t,check] + { PARSER_STATE->popScope(); } ; -/** - * Matches a type (which MUST be already declared). - * - * @return the type identifier - */ -baseType[CVC4::Type& t] - : maybeUndefinedBaseType[t,CHECK_DECLARED] +// A restrictedType is one that is not a "bare" function type (it can +// be enclosed in parentheses) and is not a type list. To support the +// syntax +// +// f : (nat, nat) -> nat; +// +// we have to permit restrictedTypes to be type lists (as on the LHS +// there). The "type" rule above uses restictedTypePossiblyFunctionLHS +// directly in order to implement that; this rule allows a type list to +// parse but then issues an error. +restrictedType[CVC4::Type& t, + CVC4::parser::DeclarationCheck check] +@init { + bool lhs; +} + : restrictedTypePossiblyFunctionLHS[t,check,lhs] + { if(lhs) { PARSER_STATE->parseError("improperly-placed type list; maybe these parentheses were meant to be square brackets, to define a tuple type?"); } } ; /** - * Matches a type (which may not be declared yet). Returns the - * identifier. If the type is declared, returns the Type in the 't' - * parameter; otherwise a null Type is returned in 't'. If 'check' is - * CHECK_DECLARED and the type is not declared, an exception is - * thrown. - * - * @return the type identifier in 't', and the id (where applicable) in 'id' + * lhs is set to "true" on output if we have a list of types, so an + * ARROW must follow. An ARROW can always follow; lhs means it MUST. */ -maybeUndefinedBaseType[CVC4::Type& t, - CVC4::parser::DeclarationCheck check] returns [CVC4::parser::cvc::mystring id] +restrictedTypePossiblyFunctionLHS[CVC4::Type& t, + CVC4::parser::DeclarationCheck check, + bool& lhs] @init { - Debug("parser-extra") << "base type: " << AntlrInput::tokenText(LT(1)) << std::endl; - AssertArgument(check == CHECK_DECLARED || check == CHECK_NONE, - check, "CVC parser: can't use CHECK_UNDECLARED with maybeUndefinedBaseType[]"); + Type t2; + Expr f; + std::string id; + std::vector<Type> types; + lhs = false; } - : BOOLEAN_TOK { t = EXPR_MANAGER->booleanType(); id = AntlrInput::tokenText($BOOLEAN_TOK); } - | INT_TOK { t = EXPR_MANAGER->integerType(); id = AntlrInput::tokenText($INT_TOK); } - | REAL_TOK { t = EXPR_MANAGER->realType(); id = AntlrInput::tokenText($REAL_TOK); } - | typeSymbol[t,check] - { id = $typeSymbol.id; } + /* named types */ + : identifier[id,check,SYM_SORT] + { if(check == CHECK_DECLARED || + PARSER_STATE->isDeclared(id, SYM_SORT)) { + t = PARSER_STATE->getSort(id); + } else { + t = PARSER_STATE->mkUnresolvedType(id); + } + } + + /* array types */ + | ARRAY_TOK restrictedType[t,check] OF_TOK restrictedType[t2,check] + { t = EXPR_MANAGER->mkArrayType(t, t2); } + + /* subtypes */ + | SUBTYPE_TOK LPAREN formula[f] ( COMMA formula[f] )? RPAREN + { PARSER_STATE->unimplementedFeature("subtypes not supported yet"); + t = Type(); } + + /* subrange types */ + | LBRACKET bound DOTDOT bound RBRACKET + { PARSER_STATE->unimplementedFeature("subranges not supported yet"); + t = Type(); } + + /* tuple types / old-style function types */ + | LBRACKET type[t,check] { types.push_back(t); } + ( COMMA type[t,check] { types.push_back(t); } )* RBRACKET + { if(types.size() == 1) { + if(types.front().isFunction()) { + // old style function syntax [ T -> U ] + PARSER_STATE->parseError("old-style function type syntax not supported anymore; please use the new syntax"); + } else { + PARSER_STATE->parseError("I'm not sure what you're trying to do here; tuples must have > 1 type"); + } + } else { + // tuple type [ T, U, V... ] + t = EXPR_MANAGER->mkTupleType(types); + } + } + + /* record types */ + | SQHASH identifier[id,CHECK_NONE,SYM_SORT] COLON type[t,check] + ( COMMA identifier[id,CHECK_NONE,SYM_SORT] COLON type[t,check] )* HASHSQ + { PARSER_STATE->unimplementedFeature("records not supported yet"); + t = Type(); } + + /* bitvector types */ + | BITVECTOR_TOK LPAREN k=numeral RPAREN + { t = EXPR_MANAGER->mkBitVectorType(k); } + + /* basic types */ + | BOOLEAN_TOK { t = EXPR_MANAGER->booleanType(); } + | REAL_TOK { t = EXPR_MANAGER->realType(); } + | INT_TOK { t = EXPR_MANAGER->integerType(); } + + /* Parenthesized general type, or the lhs of an ARROW (a list of + * types). These two things are combined to avoid conflicts in + * parsing. */ + | LPAREN type[t,check] { types.push_back(t); } + ( COMMA type[t,check] { lhs = true; types.push_back(t); } )* RPAREN + { if(lhs) { t = EXPR_MANAGER->mkTupleType(types); } + // if !lhs, t is already set up correctly, nothing to do.. + } + ; + +bound + : '_' + | numeral + | MINUS_TOK numeral ; /** @@ -653,51 +845,82 @@ typeSymbol[CVC4::Type& t, } ; -/** - * Matches a CVC4 formula. - * - * @return the expression representing the formula - */ -formula[CVC4::Expr& formula] +typeLetDecl[CVC4::parser::DeclarationCheck check] @init { - Debug("parser-extra") << "formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + Type t; + std::string id; } - : letBinding[formula] + : identifier[id,CHECK_NONE,SYM_SORT] (COLON TYPE_TOK)? EQUAL_TOK restrictedType[t,check] + { PARSER_STATE->defineType(id, t); } ; /** - * Matches a comma-separated list of formulas + * Matches a CVC4 expression. Formulas and terms are not really + * distinguished during parsing; please ignore the naming of the + * grammar rules. + * + * @return the expression representing the formula/term */ -formulaList[std::vector<CVC4::Expr>& formulas] +formula[CVC4::Expr& f] @init { - Expr f; + Debug("parser-extra") << "formula: " << AntlrInput::tokenText(LT(1)) << std::endl; + std::vector<std::string> ids; + std::vector<Expr> terms; + std::vector<Type> types; + Type t; } - : formula[f] { formulas.push_back(f); } - ( COMMA formula[f] - { formulas.push_back(f); } - )* - ; + /* quantifiers */ + : FORALL_TOK { PARSER_STATE->pushScope(); } LPAREN + boundVarDecl[ids,t] (COMMA boundVarDecl[ids,t])* RPAREN + COLON instantiationPatterns? formula[f] + { PARSER_STATE->popScope(); + PARSER_STATE->unimplementedFeature("quantifiers not supported yet"); + f = EXPR_MANAGER->mkVar(EXPR_MANAGER->booleanType()); + } + | EXISTS_TOK { PARSER_STATE->pushScope(); } LPAREN + boundVarDecl[ids,t] (COMMA boundVarDecl[ids,t])* RPAREN + COLON instantiationPatterns? formula[f] + { PARSER_STATE->popScope(); + PARSER_STATE->unimplementedFeature("quantifiers not supported yet"); + f = EXPR_MANAGER->mkVar(EXPR_MANAGER->booleanType()); + } -/** - * Matches a LET binding - */ -letBinding[CVC4::Expr& f] -@init { -} - : LET_TOK - { PARSER_STATE->pushScope(); } - letDecls - IN_TOK letBinding[f] - { PARSER_STATE->popScope(); } + /* lets: letDecl defines the variables and functionss, we just + * manage the scopes here. NOTE that LET in the CVC language is + * always sequential! */ + | LET_TOK { PARSER_STATE->pushScope(); } + letDecl ( COMMA letDecl )* + IN_TOK formula[f] { PARSER_STATE->popScope(); } + + /* lambda */ + | LAMBDA { PARSER_STATE->pushScope(); } LPAREN + boundVarDeclsReturn[terms,types] + RPAREN COLON formula[f] + { PARSER_STATE->popScope(); + Type t = EXPR_MANAGER->mkFunctionType(types, f.getType()); + Expr func = PARSER_STATE->mkFunction("anonymous", t); + Command* cmd = new DefineFunctionCommand(func, terms, f); + PARSER_STATE->preemptCommand(cmd); + f = func; + } + + /* array literals */ + | ARRAY_TOK { PARSER_STATE->pushScope(); } LPAREN + boundVarDecl[ids,t] RPAREN COLON formula[f] + { PARSER_STATE->popScope(); + PARSER_STATE->unimplementedFeature("array literals not supported yet"); + f = EXPR_MANAGER->mkVar(EXPR_MANAGER->mkArrayType(t, f.getType())); + } + + /* everything else */ | booleanFormula[f] ; -/** - * Matches (and defines) LET declarations in order. Note this implements - * sequential-let (think "let*") rather than simultaneous-let. - */ -letDecls - : letDecl (COMMA letDecl)* +instantiationPatterns +@init { + Expr f; +} + : ( PATTERN_TOK LPAREN formula[f] (COMMA formula[f])* RPAREN COLON )+ ; /** @@ -709,7 +932,12 @@ letDecl std::string name; } : identifier[name,CHECK_NONE,SYM_VARIABLE] EQUAL_TOK formula[e] - { PARSER_STATE->defineVar(name, e); } + { Debug("parser") << Expr::setlanguage(language::output::LANG_CVC4) << e.getType() << std::endl; + PARSER_STATE->defineVar(name, e); + Debug("parser") << "LET[" << PARSER_STATE->getDeclarationLevel() << "]: " + << name << std::endl + << " ==>" << " " << e << std::endl; + } ; booleanFormula[CVC4::Expr& f] @@ -724,17 +952,22 @@ booleanFormula[CVC4::Expr& f] --notCount; f = EXPR_MANAGER->mkExpr(kind::NOT, f); } - expressions.push_back(f); } + expressions.push_back(f); + Assert(notCount == 0); + } ( booleanBinop[op] ( NOT_TOK { ++notCount; } )* comparison[f] - { operators.push_back(op); - while(notCount > 0) { + { while(notCount > 0) { --notCount; f = EXPR_MANAGER->mkExpr(kind::NOT, f); } + operators.push_back(op); +# warning cannot do NOT FORALL or TRUE AND FORALL, or.. expressions.push_back(f); + Assert(notCount == 0); } )* - { f = createPrecedenceTree(EXPR_MANAGER, expressions, operators); } + { Assert(notCount == 0); + f = createPrecedenceTree(EXPR_MANAGER, expressions, operators); } ; booleanBinop[unsigned& op] @@ -812,9 +1045,8 @@ uminusTerm[CVC4::Expr& f] @init { unsigned minusCount = 0; } - : /* Unary minus */ -// (MINUS_TOK { ++minusCount; })* concatBitwiseTerm[f] - (MINUS_TOK { ++minusCount; })+ concatBitwiseTerm[f] + /* Unary minus */ + : (MINUS_TOK { ++minusCount; })+ concatBitwiseTerm[f] { while(minusCount > 0) { --minusCount; f = MK_EXPR(CVC4::kind::UMINUS, f); } } | concatBitwiseTerm[f] ; @@ -844,8 +1076,8 @@ bitwiseXorTerm[CVC4::Expr& f] @init { Expr f2; } - : /* BV xor */ - BVXOR_TOK LPAREN formula[f] COMMA formula[f] RPAREN + /* BV xor */ + : BVXOR_TOK LPAREN formula[f] COMMA formula[f] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_XOR, f, f2); } | BVNAND_TOK LPAREN formula[f] COMMA formula[f] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_NAND, f, f2); } @@ -858,8 +1090,8 @@ bitwiseXorTerm[CVC4::Expr& f] | bvNegTerm[f] ; bvNegTerm[CVC4::Expr& f] - : /* BV neg */ - BVNEG_TOK bvNegTerm[f] + /* BV neg */ + : BVNEG_TOK bvNegTerm[f] { f = MK_EXPR(CVC4::kind::BITVECTOR_NOT, f); } | bvUminusTerm[f] ; @@ -867,14 +1099,15 @@ bvUminusTerm[CVC4::Expr& f] @init { Expr f2; } - : /* BV unary minus */ - BVUMINUS_TOK LPAREN formula[f] RPAREN + /* BV unary minus */ + : BVUMINUS_TOK LPAREN formula[f] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_NEG, f); } - | BVPLUS_TOK LPAREN INTEGER_LITERAL COMMA formula[f] + /* BV addition */ + | BVPLUS_TOK LPAREN k=numeral COMMA formula[f] ( COMMA formula[f2] { f = MK_EXPR(CVC4::kind::BITVECTOR_PLUS, f, f2); } )+ RPAREN { unsigned size = BitVectorType(f.getType()).getSize(); - unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); if(k == 0) { +# warning cannot do BVPLUS(...)[i:j] PARSER_STATE->parseError("BVPLUS(k,_,_,...) must have k > 0"); } if(k > size) { @@ -883,9 +1116,9 @@ bvUminusTerm[CVC4::Expr& f] f = MK_EXPR(MK_CONST(BitVectorExtract(0, k - 1)), f); } } - | BVSUB_TOK LPAREN INTEGER_LITERAL COMMA formula[f] COMMA formula[f2] RPAREN + /* BV subtraction */ + | BVSUB_TOK LPAREN k=numeral COMMA formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_SUB, f, f2); - unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); if(k == 0) { PARSER_STATE->parseError("BVSUB(k,_,_) must have k > 0"); } @@ -896,9 +1129,9 @@ bvUminusTerm[CVC4::Expr& f] f = MK_EXPR(MK_CONST(BitVectorExtract(0, k - 1)), f); } } - | BVMULT_TOK LPAREN INTEGER_LITERAL COMMA formula[f] COMMA formula[f2] RPAREN + /* BV multiplication */ + | BVMULT_TOK LPAREN k=numeral COMMA formula[f] COMMA formula[f2] RPAREN { MK_EXPR(CVC4::kind::BITVECTOR_MULT, f, f2); - unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); if(k == 0) { PARSER_STATE->parseError("BVMULT(k,_,_) must have k > 0"); } @@ -909,41 +1142,56 @@ bvUminusTerm[CVC4::Expr& f] f = MK_EXPR(MK_CONST(BitVectorExtract(0, k - 1)), f); } } + /* BV unsigned division */ | BVUDIV_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_UDIV, f, f2); } + /* BV signed division */ | BVSDIV_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_SDIV, f, f2); } + /* BV unsigned remainder */ | BVUREM_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_UREM, f, f2); } + /* BV signed remainder */ | BVSREM_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_SREM, f, f2); } + /* BV signed modulo */ | BVSMOD_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_SMOD, f, f2); } + /* BV left shift */ | BVSHL_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_SHL, f, f2); } + /* BV arithmetic right shift */ | BVASHR_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_ASHR, f, f2); } + /* BV logical left shift */ | BVLSHR_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_LSHR, f, f2); } - | SX_TOK LPAREN formula[f] COMMA INTEGER_LITERAL RPAREN - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); - unsigned n = BitVectorType(f.getType()).getSize(); - // sign extension in TheoryBitVector is defined as in SMT-LIBv2 + /* BV sign extension */ + | SX_TOK LPAREN formula[f] COMMA k=numeral RPAREN + { unsigned n = BitVectorType(f.getType()).getSize(); + // Sign extension in TheoryBitVector is defined as in SMT-LIBv2 + // which is different than in the CVC language + // SX(BITVECTOR(k), n) in CVC language extends to n bits + // In SMT-LIBv2, such a thing expands to k + n bits f = MK_EXPR(MK_CONST(BitVectorSignExtend(k - n)), f); } - | BVZEROEXTEND_TOK LPAREN formula[f] COMMA INTEGER_LITERAL RPAREN - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); - unsigned n = BitVectorType(f.getType()).getSize(); - // also zero extension - f = MK_EXPR(MK_CONST(BitVectorZeroExtend(k - n)), f); } - | BVREPEAT_TOK LPAREN formula[f] COMMA INTEGER_LITERAL RPAREN - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); - f = MK_EXPR(MK_CONST(BitVectorRepeat(k)), f); } - | BVROTR_TOK LPAREN formula[f] COMMA INTEGER_LITERAL RPAREN - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); - f = MK_EXPR(MK_CONST(BitVectorSignExtend(k)), f); } - | BVROTL_TOK LPAREN formula[f] COMMA INTEGER_LITERAL RPAREN - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); - f = MK_EXPR(MK_CONST(BitVectorRotateLeft(k)), f); } + /* BV zero extension */ + | BVZEROEXTEND_TOK LPAREN formula[f] COMMA k=numeral RPAREN + { unsigned n = BitVectorType(f.getType()).getSize(); + // Zero extension in TheoryBitVector is defined as in SMT-LIBv2 + // which is the same as in CVC3, but different than SX! + // SX(BITVECTOR(k), n) in CVC language extends to n bits + // BVZEROEXTEND(BITVECTOR(k), n) in CVC language extends to k + n bits + f = MK_EXPR(MK_CONST(BitVectorZeroExtend(k)), f); } + /* BV repeat operation */ + | BVREPEAT_TOK LPAREN formula[f] COMMA k=numeral RPAREN + { f = MK_EXPR(MK_CONST(BitVectorRepeat(k)), f); } + /* BV rotate right */ + | BVROTR_TOK LPAREN formula[f] COMMA k=numeral RPAREN + { f = MK_EXPR(MK_CONST(BitVectorSignExtend(k)), f); } + /* BV rotate left */ + | BVROTL_TOK LPAREN formula[f] COMMA k=numeral RPAREN + { f = MK_EXPR(MK_CONST(BitVectorRotateLeft(k)), f); } + /* left and right shifting with << and >>, or something else */ | bvShiftTerm[f] ; @@ -952,8 +1200,9 @@ bvShiftTerm[CVC4::Expr& f] bool left = false; } : bvComparison[f] - ( (LEFTSHIFT_TOK { left = true; } | RIGHTSHIFT_TOK) INTEGER_LITERAL - { unsigned k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); + ( (LEFTSHIFT_TOK { left = true; } | RIGHTSHIFT_TOK) k=numeral + { // Defined in: + // http://www.cs.nyu.edu/acsys/cvc3/doc/user_doc.html#user_doc_pres_lang_expr_bit if(left) { f = MK_EXPR(kind::BITVECTOR_CONCAT, f, MK_CONST(BitVector(k))); } else { @@ -969,8 +1218,8 @@ bvComparison[CVC4::Expr& f] @init { Expr f2; } - : /* BV comparison */ - BVLT_TOK LPAREN formula[f] COMMA formula[f2] RPAREN + /* BV comparisons */ + : BVLT_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_ULT, f, f2); } | BVLE_TOK LPAREN formula[f] COMMA formula[f2] RPAREN { f = MK_EXPR(CVC4::kind::BITVECTOR_ULE, f, f2); } @@ -999,16 +1248,15 @@ selectExtractTerm[CVC4::Expr& f] Expr f2; bool extract; } - : /* array select / bitvector extract */ - simpleTerm[f] + /* array select / bitvector extract */ + : simpleTerm[f] ( { extract = false; } - LBRACKET formula[f2] ( COLON INTEGER_LITERAL { extract = true; } )? RBRACKET + LBRACKET formula[f2] ( COLON k2=numeral { extract = true; } )? RBRACKET { if(extract) { if(f2.getKind() != kind::CONST_INTEGER) { - PARSER_STATE->parseError("bitvector extraction requires [INTEGER_LITERAL:INTEGER_LITERAL] range"); + PARSER_STATE->parseError("bitvector extraction requires [numeral:numeral] range"); } unsigned k1 = f2.getConst<Integer>().getLong(); - unsigned k2 = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); f = MK_EXPR(MK_CONST(BitVectorExtract(k1, k2)), f); } else { f = MK_EXPR(CVC4::kind::SELECT, f, f2); @@ -1025,37 +1273,63 @@ simpleTerm[CVC4::Expr& f] std::vector<Expr> args; Debug("parser-extra") << "term: " << AntlrInput::tokenText(LT(1)) << std::endl; } - : /* function/constructor application */ - functionSymbol[f] - { args.push_back( f ); } - LPAREN formulaList[args] RPAREN + /* function/constructor application */ + : identifier[name,CHECK_DECLARED,SYM_VARIABLE] + { Debug("parser") << " at level " << PARSER_STATE->getDeclarationLevel() << std::endl; + Debug("parser") << " isFunction " << PARSER_STATE->isFunctionLike(name) << std::endl; + Debug("parser") << " isDefinedFunction " << PARSER_STATE->isDefinedFunction(name) << std::endl; + Debug("parser") << " name " << name << std::endl; + Debug("parser") << " isDeclared " << PARSER_STATE->isDeclared(name) << std::endl; + Debug("parser") << " getType " << PARSER_STATE->getType(name) << std::endl; + PARSER_STATE->checkFunctionLike(name); + f = PARSER_STATE->getVariable(name); + args.push_back(f); + } + LPAREN formula[f] { args.push_back(f); } + ( COMMA formula[f] { args.push_back(f); } )* RPAREN // TODO: check arity - { Type t = f.getType(); - if( t.isFunction() ) { + { Type t = args.front().getType(); + Debug("parser") << "type is " << t << std::endl; + Debug("parser") << "expr is " << args.front() << std::endl; + if(PARSER_STATE->isDefinedFunction(name)) { + f = MK_EXPR(CVC4::kind::APPLY, args); + } else if(t.isFunction()) { f = MK_EXPR(CVC4::kind::APPLY_UF, args); - } else if( t.isConstructor() ) { + } else if(t.isConstructor()) { Debug("parser-idt") << "apply constructor: " << name.c_str() << " " << args.size() << std::endl; f = MK_EXPR(CVC4::kind::APPLY_CONSTRUCTOR, args); - } else if( t.isSelector() ) { + } else if(t.isSelector()) { Debug("parser-idt") << "apply selector: " << name.c_str() << " " << args.size() << std::endl; f = MK_EXPR(CVC4::kind::APPLY_SELECTOR, args); - } else if( t.isTester() ) { + } else if(t.isTester()) { Debug("parser-idt") << "apply tester: " << name.c_str() << " " << args.size() << std::endl; f = MK_EXPR(CVC4::kind::APPLY_TESTER, args); + } else { + Unhandled(t); } } - | /* if-then-else */ - iteTerm[f] - - | /* parenthesized sub-formula */ - LPAREN formula[f] RPAREN + /* if-then-else */ + | iteTerm[f] + + /* parenthesized sub-formula / tuple literals */ + | LPAREN formula[f] { args.push_back(f); } + ( COMMA formula[f] { args.push_back(f); } )* RPAREN + { if(args.size() > 1) { + /* If args has elements, we must be a tuple literal. + * Otherwise, f is already the sub-formula, and + * there's nothing to do */ + f = EXPR_MANAGER->mkExpr(kind::TUPLE, args); + } + } - /* constants */ + /* boolean literals */ | TRUE_TOK { f = MK_CONST(true); } | FALSE_TOK { f = MK_CONST(false); } + /* arithmetic literals */ | INTEGER_LITERAL { f = MK_CONST( AntlrInput::tokenToInteger($INTEGER_LITERAL) ); } | DECIMAL_LITERAL { f = MK_CONST( AntlrInput::tokenToRational($DECIMAL_LITERAL) ); } + /* bitvector literals */ | HEX_LITERAL { Assert( AntlrInput::tokenText($HEX_LITERAL).find("0hex") == 0 ); std::string hexString = AntlrInput::tokenTextSubstr($HEX_LITERAL, 4); @@ -1064,19 +1338,34 @@ simpleTerm[CVC4::Expr& f] { Assert( AntlrInput::tokenText($BINARY_LITERAL).find("0bin") == 0 ); std::string binString = AntlrInput::tokenTextSubstr($BINARY_LITERAL, 4); f = MK_CONST( BitVector(binString, 2) ); } + /* record literals */ + | PARENHASH recordEntry (COMMA recordEntry)+ HASHPAREN + { PARSER_STATE->unimplementedFeature("records not implemented yet"); + f = Expr(); } - | /* variable */ - identifier[name,CHECK_DECLARED,SYM_VARIABLE] + /* variable / zero-ary constructor application */ + | identifier[name,CHECK_DECLARED,SYM_VARIABLE] { f = PARSER_STATE->getVariable(name); - // datatypes: 0-ary constructors - if( PARSER_STATE->getType(name).isConstructor() ){ - args.push_back( f ); + // datatypes: zero-ary constructors + if(PARSER_STATE->getType(name).isConstructor()) { + args.push_back(f); f = MK_EXPR(CVC4::kind::APPLY_CONSTRUCTOR, args); } } ; /** + * Matches an entry in a record literal. + */ +recordEntry +@init { + std::string id; + Expr f; +} + : identifier[id,CHECK_DECLARED,SYM_VARIABLE] ASSIGN_TOK formula[f] + ; + +/** * Parses an ITE term. */ iteTerm[CVC4::Expr& f] @@ -1107,29 +1396,19 @@ iteElseTerm[CVC4::Expr& f] ; /** - * Parses a function symbol (an identifier). - * @param what kind of check to perform on the id - * @return the predicate symol - */ -functionSymbol[CVC4::Expr& f] -@init { - Debug("parser-extra") << "function symbol: " << AntlrInput::tokenText(LT(1)) << std::endl; - std::string name; -} - : identifier[name,CHECK_DECLARED,SYM_VARIABLE] - { PARSER_STATE->checkFunctionLike(name); - f = PARSER_STATE->getVariable(name); } - ; - -/** * Parses a datatype definition */ datatypeDef[std::vector<CVC4::Datatype>& datatypes] @init { std::string id; } - : identifier[id,CHECK_UNDECLARED,SYM_SORT] - { datatypes.push_back(Datatype(id)); } + : identifier[id,CHECK_NONE,SYM_SORT] + { datatypes.push_back(Datatype(id)); + if(!PARSER_STATE->isUnresolvedType(id)) { + // if not unresolved, must be undeclared + PARSER_STATE->checkDeclaration(id, CHECK_UNDECLARED, SYM_SORT); + } + } EQUAL_TOK constructorDef[datatypes.back()] ( BAR constructorDef[datatypes.back()] )* ; @@ -1165,19 +1444,15 @@ constructorDef[CVC4::Datatype& type] selector[CVC4::Datatype::Constructor& ctor] @init { std::string id; - Type type; + Type t; } - : identifier[id,CHECK_UNDECLARED,SYM_SORT] COLON - maybeUndefinedBaseType[type,CHECK_NONE] - // TODO: this doesn't permit datatype fields of ARRAY or BITVECTOR - // type. This will be problematic, since, after all, you could - // have something nasty like this: - // - // DATATYPE list = cons(car:ARRAY list OF list, cdr:list) | nil END; - { if(type.isNull()) { - ctor.addArg(id, Datatype::UnresolvedType($maybeUndefinedBaseType.id)); + : identifier[id,CHECK_UNDECLARED,SYM_SORT] COLON type[t,CHECK_NONE] + { if(t.isNull()) { +# warning FIXME datatypes + //std::pair<Type, std::string> unresolved = PARSER_STATE->newUnresolvedType(); + //ctor.addArg(id, Datatype::UnresolvedType(unresolved.second); } else { - ctor.addArg(id, type); + ctor.addArg(id, t); } Debug("parser-idt") << "selector: " << id.c_str() << std::endl; } @@ -1195,6 +1470,17 @@ IDENTIFIER : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*; INTEGER_LITERAL: DIGIT+; /** + * Same as an integer literal converted to an unsigned int, but + * slightly more convenient AND works around a strange ANTLR bug (?) + * in the BVPLUS/BVMINUS/BVMULT rules where $INTEGER_LITERAL was + * returning a reference to the wrong token?! + */ +numeral returns [unsigned k] + : INTEGER_LITERAL + { $k = AntlrInput::tokenToUnsigned($INTEGER_LITERAL); } + ; + +/** * Matches a decimal literal. */ DECIMAL_LITERAL: DIGIT+ '.' DIGIT+; @@ -1245,4 +1531,3 @@ COMMENT : '%' (~('\n' | '\r'))* { SKIP();; }; * Matches an allowed escaped character. */ fragment ESCAPE : '\\' ('"' | '\\' | 'n' | 't' | 'r'); - diff --git a/src/parser/cvc/cvc_input.h b/src/parser/cvc/cvc_input.h index a3de3a61f..45a0edf95 100644 --- a/src/parser/cvc/cvc_input.h +++ b/src/parser/cvc/cvc_input.h @@ -53,6 +53,11 @@ public: /** Destructor. Frees the lexer and the parser. */ ~CvcInput(); + /** Get the language that this Input is reading. */ + InputLanguage getLanguage() const throw() { + return language::input::LANG_CVC4; + } + protected: /** Parse a command from the input. Returns <code>NULL</code> if there is diff --git a/src/parser/input.h b/src/parser/input.h index 71b2faeae..25023e1a8 100644 --- a/src/parser/input.h +++ b/src/parser/input.h @@ -142,6 +142,9 @@ public: /** Retrieve the remaining text in this input. */ virtual std::string getUnparsedText() = 0; + /** Get the language that this Input is reading. */ + virtual InputLanguage getLanguage() const throw() = 0; + protected: /** Create an input. diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp index c8a9876d5..19d1bbcb8 100644 --- a/src/parser/parser.cpp +++ b/src/parser/parser.cpp @@ -29,6 +29,7 @@ #include "expr/kind.h" #include "expr/type.h" #include "util/output.h" +#include "util/options.h" #include "util/Assert.h" #include "parser/cvc/cvc_input.h" #include "parser/smt/smt_input.h" @@ -39,14 +40,15 @@ using namespace CVC4::kind; namespace CVC4 { namespace parser { -Parser::Parser(ExprManager* exprManager, Input* input, bool strictMode) : +Parser::Parser(ExprManager* exprManager, Input* input, bool strictMode, bool parseOnly) : d_exprManager(exprManager), d_input(input), d_declScopeAllocated(), d_declScope(&d_declScopeAllocated), d_done(false), d_checksEnabled(true), - d_strictMode(strictMode) { + d_strictMode(strictMode), + d_parseOnly(parseOnly) { d_input->setParser(*this); } @@ -136,7 +138,7 @@ Parser::mkFunction(const std::string& name, const Type& type) { return expr; } -const std::vector<Expr> +std::vector<Expr> Parser::mkVars(const std::vector<std::string> names, const Type& type) { std::vector<Expr> vars; @@ -188,7 +190,7 @@ Parser::defineParameterizedType(const std::string& name, defineType(name, params, type); } -Type +SortType Parser::mkSort(const std::string& name) { Debug("parser") << "newSort(" << name << ")" << std::endl; Type type = d_exprManager->mkSort(name); @@ -196,34 +198,54 @@ Parser::mkSort(const std::string& name) { return type; } -Type +SortConstructorType Parser::mkSortConstructor(const std::string& name, size_t arity) { Debug("parser") << "newSortConstructor(" << name << ", " << arity << ")" << std::endl; - Type type = d_exprManager->mkSortConstructor(name, arity); + SortConstructorType type = d_exprManager->mkSortConstructor(name, arity); defineType(name, vector<Type>(arity), type); return type; } -std::vector<Type> +std::vector<SortType> Parser::mkSorts(const std::vector<std::string>& names) { - std::vector<Type> types; + std::vector<SortType> types; for(unsigned i = 0; i < names.size(); ++i) { types.push_back(mkSort(names[i])); } return types; } +SortType Parser::mkUnresolvedType(const std::string& name) { + SortType unresolved = mkSort(name); + d_unresolved.insert(unresolved); + return unresolved; +} + +bool Parser::isUnresolvedType(const std::string& name) { + if(!isDeclared(name, SYM_SORT)) { + return false; + } + return d_unresolved.find(getSort(name)) != d_unresolved.end(); +} + std::vector<DatatypeType> Parser::mkMutualDatatypeTypes(const std::vector<Datatype>& datatypes) { + std::vector<DatatypeType> types = - d_exprManager->mkMutualDatatypeTypes(datatypes); + d_exprManager->mkMutualDatatypeTypes(datatypes, d_unresolved); + Assert(datatypes.size() == types.size()); + for(unsigned i = 0; i < datatypes.size(); ++i) { DatatypeType t = types[i]; const Datatype& dt = t.getDatatype(); - Debug("parser-idt") << "define " << dt.getName() << " as " << t << std::endl; - defineType(dt.getName(), t); + const std::string& name = dt.getName(); + Debug("parser-idt") << "define " << name << " as " << t << std::endl; + if(isDeclared(name, SYM_SORT)) { + throw ParserException(name + " already declared"); + } + defineType(name, t); for(Datatype::const_iterator j = dt.begin(), j_end = dt.end(); j != j_end; @@ -258,6 +280,12 @@ Parser::mkMutualDatatypeTypes(const std::vector<Datatype>& datatypes) { } } } + + // These are no longer used, and the ExprManager would have + // complained of a bad substitution if anything is left unresolved. + // Clear out the set. + d_unresolved.clear(); + return types; } @@ -273,9 +301,9 @@ bool Parser::isDeclared(const std::string& name, SymbolType type) { } void Parser::checkDeclaration(const std::string& varName, - DeclarationCheck check, - SymbolType type) - throw (ParserException) { + DeclarationCheck check, + SymbolType type) + throw(ParserException) { if(!d_checksEnabled) { return; } @@ -283,13 +311,13 @@ void Parser::checkDeclaration(const std::string& varName, switch(check) { case CHECK_DECLARED: if( !isDeclared(varName, type) ) { - parseError("Symbol " + varName + " not declared"); + parseError("Symbol " + varName + " not declared as a " + (type == SYM_VARIABLE ? "variable" : "type")); } break; case CHECK_UNDECLARED: if( isDeclared(varName, type) ) { - parseError("Symbol " + varName + " previously declared"); + parseError("Symbol " + varName + " previously declared as a " + (type == SYM_VARIABLE ? "variable" : "type")); } break; @@ -301,21 +329,20 @@ void Parser::checkDeclaration(const std::string& varName, } } -void Parser::checkFunctionLike(const std::string& name) - throw (ParserException) { - if( d_checksEnabled && !isFunctionLike(name) ) { +void Parser::checkFunctionLike(const std::string& name) throw(ParserException) { + if(d_checksEnabled && !isFunctionLike(name)) { parseError("Expecting function-like symbol, found '" + name + "'"); } } -void Parser::checkArity(Kind kind, unsigned int numArgs) - throw (ParserException) { +void Parser::checkArity(Kind kind, unsigned numArgs) + throw(ParserException) { if(!d_checksEnabled) { return; } - unsigned int min = d_exprManager->minArity(kind); - unsigned int max = d_exprManager->maxArity(kind); + unsigned min = d_exprManager->minArity(kind); + unsigned max = d_exprManager->maxArity(kind); if( numArgs < min || numArgs > max ) { stringstream ss; @@ -331,7 +358,7 @@ void Parser::checkArity(Kind kind, unsigned int numArgs) } } -void Parser::checkOperator(Kind kind, unsigned int numArgs) throw (ParserException) { +void Parser::checkOperator(Kind kind, unsigned numArgs) throw(ParserException) { if( d_strictMode && d_logicOperators.find(kind) == d_logicOperators.end() ) { parseError( "Operator is not defined in the current logic: " + kindToString(kind) ); } @@ -371,7 +398,11 @@ Command* Parser::nextCommand() throw(ParserException) { } catch(Exception& e) { setDone(); stringstream ss; - ss << e; + // set the language of the stream, otherwise if it contains + // Exprs or Types it prints in the AST language + OutputLanguage outlang = + language::toOutputLanguage(d_input->getLanguage()); + ss << Expr::setlanguage(outlang) << e; parseError( ss.str() ); } } diff --git a/src/parser/parser.h b/src/parser/parser.h index 718b862ab..25fb30be6 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -55,18 +55,20 @@ enum DeclarationCheck { CHECK_NONE }; -/** Returns a string representation of the given object (for - debugging). */ -inline std::string toString(DeclarationCheck check) { +/** + * Returns a string representation of the given object (for + * debugging). + */ +inline std::ostream& operator<<(std::ostream& out, DeclarationCheck check) { switch(check) { case CHECK_NONE: - return "CHECK_NONE"; + return out << "CHECK_NONE"; case CHECK_DECLARED: - return "CHECK_DECLARED"; + return out << "CHECK_DECLARED"; case CHECK_UNDECLARED: - return "CHECK_UNDECLARED"; + return out << "CHECK_UNDECLARED"; default: - Unreachable(); + return out << "DeclarationCheck!UNKNOWN"; } } @@ -80,16 +82,18 @@ enum SymbolType { SYM_SORT }; -/** Returns a string representation of the given object (for - debugging). */ -inline std::string toString(SymbolType type) { +/** + * Returns a string representation of the given object (for + * debugging). + */ +inline std::ostream& operator<<(std::ostream& out, SymbolType type) { switch(type) { case SYM_VARIABLE: - return "SYM_VARIABLE"; + return out << "SYM_VARIABLE"; case SYM_SORT: - return "SYM_SORT"; + return out << "SYM_SORT"; default: - Unreachable(); + return out << "SymbolType!UNKNOWN"; } } @@ -133,10 +137,22 @@ class CVC4_PUBLIC Parser { /** Are we parsing in strict mode? */ bool d_strictMode; + /** Are we only parsing? */ + bool d_parseOnly; + /** The set of operators available in the current logic. */ std::set<Kind> d_logicOperators; /** + * The current set of unresolved types. We can get by with this NOT + * being on the scope, because we can only have one DATATYPE + * definition going on at one time. This is a bit hackish; we + * depend on mkMutualDatatypeTypes() to check everything and clear + * this out. + */ + std::set<SortType> d_unresolved; + + /** * "Preemption commands": extra commands implied by subterms that * should be issued before the currently-being-parsed command is * issued. Used to support SMT-LIBv2 ":named" attribute on terms. @@ -155,7 +171,7 @@ protected: * @param input the parser input * @param strictMode whether to incorporate strict(er) compliance checks */ - Parser(ExprManager* exprManager, Input* input, bool strictMode = false); + Parser(ExprManager* exprManager, Input* input, bool strictMode = false, bool parseOnly = false); public: @@ -255,7 +271,7 @@ public: * @throws ParserException if checks are enabled and the check fails */ void checkDeclaration(const std::string& name, DeclarationCheck check, - SymbolType type = SYM_VARIABLE) throw (ParserException); + SymbolType type = SYM_VARIABLE) throw(ParserException); /** * Checks whether the given name is bound to a function. @@ -263,7 +279,7 @@ public: * @throws ParserException if checks are enabled and name is not * bound to a function */ - void checkFunctionLike(const std::string& name) throw (ParserException); + void checkFunctionLike(const std::string& name) throw(ParserException); /** * Check that <code>kind</code> can accept <code>numArgs</code> arguments. @@ -273,7 +289,7 @@ public: * <code>kind</code> cannot be applied to <code>numArgs</code> * arguments. */ - void checkArity(Kind kind, unsigned int numArgs) throw (ParserException); + void checkArity(Kind kind, unsigned numArgs) throw(ParserException); /** * Check that <code>kind</code> is a legal operator in the current @@ -284,7 +300,7 @@ public: * @throws ParserException if the parser mode is strict and the * operator <code>kind</code> has not been enabled */ - void checkOperator(Kind kind, unsigned int numArgs) throw (ParserException); + void checkOperator(Kind kind, unsigned numArgs) throw(ParserException); /** * Returns the type for the variable with the given name. @@ -300,7 +316,7 @@ public: /** * Create a set of new CVC4 variable expressions of the given type. */ - const std::vector<Expr> + std::vector<Expr> mkVars(const std::vector<std::string> names, const Type& type); /** Create a new CVC4 function expression of the given type. */ @@ -327,17 +343,27 @@ public: /** * Creates a new sort with the given name. */ - Type mkSort(const std::string& name); + SortType mkSort(const std::string& name); /** * Creates a new sort constructor with the given name and arity. */ - Type mkSortConstructor(const std::string& name, size_t arity); + SortConstructorType mkSortConstructor(const std::string& name, size_t arity); /** * Creates new sorts with the given names (all of arity 0). */ - std::vector<Type> mkSorts(const std::vector<std::string>& names); + std::vector<SortType> mkSorts(const std::vector<std::string>& names); + + /** + * Creates a new "unresolved type," used only during parsing. + */ + SortType mkUnresolvedType(const std::string& name); + + /** + * Returns true IFF name is an unresolved type. + */ + bool isUnresolvedType(const std::string& name); /** * Create sorts of mutually-recursive datatypes. @@ -371,13 +397,37 @@ public: /** Is the symbol bound to a predicate? */ bool isPredicate(const std::string& name); + /** Parse and return the next command. */ Command* nextCommand() throw(ParserException); + + /** Parse and return the next expression. */ Expr nextExpression() throw(ParserException); - inline void parseError(const std::string& msg) throw (ParserException) { + /** Raise a parse error with the given message. */ + inline void parseError(const std::string& msg) throw(ParserException) { d_input->parseError(msg); } + /** + * If we are parsing only, don't raise an exception; if we are not, + * raise a parse error with the given message. There is no actual + * parse error, everything is as expected, but we cannot create the + * Expr, Type, or other requested thing yet due to internal + * limitations. Even though it's not a parse error, we throw a + * parse error so that the input line and column information is + * available. + * + * Think quantifiers. We don't have a TheoryQuantifiers yet, so we + * have no kind::FORALL or kind::EXISTS. But we might want to + * support parsing quantifiers (just not doing anything with them). + * So this mechanism gives you a way to do it with --parse-only. + */ + inline void unimplementedFeature(const std::string& msg) throw(ParserException) { + if(!d_parseOnly) { + parseError(msg); + } + } + inline void pushScope() { d_declScope->pushScope(); } inline void popScope() { d_declScope->popScope(); } @@ -405,7 +455,7 @@ public: * * In short, caveat emptor. */ - void useDeclarationsFrom(Parser* parser) { + inline void useDeclarationsFrom(Parser* parser) { if(parser == NULL) { d_declScope = &d_declScopeAllocated; } else { @@ -414,6 +464,13 @@ public: } /** + * Gets the current declaration level. + */ + inline size_t getDeclarationLevel() const throw() { + return d_declScope->getLevel(); + } + + /** * An expression stream interface for a parser. This stream simply * pulls expressions from the given Parser object. * diff --git a/src/parser/parser_builder.cpp b/src/parser/parser_builder.cpp index 38f41f47a..9773445ed 100644 --- a/src/parser/parser_builder.cpp +++ b/src/parser/parser_builder.cpp @@ -2,10 +2,10 @@ /*! \file parser_builder.cpp ** \verbatim ** Original author: cconway - ** Major contributors: none - ** Minor contributors (to current version): mdeters + ** Major contributors: mdeters + ** Minor contributors (to current version): none ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -57,6 +57,7 @@ void ParserBuilder::init(ExprManager* exprManager, d_checksEnabled = true; d_strictMode = false; d_mmap = false; + d_parseOnly = false; } Parser *ParserBuilder::build() @@ -81,13 +82,13 @@ Parser *ParserBuilder::build() Parser *parser = NULL; switch(d_lang) { case language::input::LANG_SMTLIB: - parser = new Smt(d_exprManager, input, d_strictMode); + parser = new Smt(d_exprManager, input, d_strictMode, d_parseOnly); break; case language::input::LANG_SMTLIB_V2: - parser = new Smt2(d_exprManager, input, d_strictMode); + parser = new Smt2(d_exprManager, input, d_strictMode, d_parseOnly); break; default: - parser = new Parser(d_exprManager, input, d_strictMode); + parser = new Parser(d_exprManager, input, d_strictMode, d_parseOnly); } if( d_checksEnabled ) { @@ -124,18 +125,23 @@ ParserBuilder& ParserBuilder::withInputLanguage(InputLanguage lang) { return *this; } - ParserBuilder& ParserBuilder::withMmap(bool flag) { d_mmap = flag; return *this; } +ParserBuilder& ParserBuilder::withParseOnly(bool flag) { + d_parseOnly = flag; + return *this; +} + ParserBuilder& ParserBuilder::withOptions(const Options& options) { return withInputLanguage(options.inputLanguage) .withMmap(options.memoryMap) .withChecks(options.semanticChecks) - .withStrictMode(options.strictParsing); + .withStrictMode(options.strictParsing) + .withParseOnly(options.parseOnly); } ParserBuilder& ParserBuilder::withStrictMode(bool flag) { diff --git a/src/parser/parser_builder.h b/src/parser/parser_builder.h index 0d01104eb..6f4f051ec 100644 --- a/src/parser/parser_builder.h +++ b/src/parser/parser_builder.h @@ -2,10 +2,10 @@ /*! \file parser_builder.h ** \verbatim ** Original author: cconway - ** Major contributors: none - ** Minor contributors (to current version): mdeters + ** Major contributors: mdeters + ** Minor contributors (to current version): none ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -75,6 +75,10 @@ class CVC4_PUBLIC ParserBuilder { /** Should we memory-map a file input? */ bool d_mmap; + /** Are we parsing only? */ + bool d_parseOnly; + + /** Initialize this parser builder */ void init(ExprManager* exprManager, const std::string& filename); public: @@ -97,24 +101,50 @@ public: /** Set the parser to read a file for its input. (Default) */ ParserBuilder& withFileInput(); - /** Set the filename for use by the parser. If file input is used, + /** + * Set the filename for use by the parser. If file input is used, * this file will be opened and read by the parser. Otherwise, the * filename string (possibly a non-existent path) will only be used - * in error messages. */ + * in error messages. + */ ParserBuilder& withFilename(const std::string& filename); - /** Set the input language to be used by the parser. (Default: - LANG_AUTO). */ + /** + * Set the input language to be used by the parser. + * + * (Default: LANG_AUTO) + */ ParserBuilder& withInputLanguage(InputLanguage lang); - /** Should the parser memory-map its input? This is only relevant if - * the parser will have a file input. (Default: no) */ + /** + * Should the parser memory-map its input? This is only relevant if + * the parser will have a file input. + * + * (Default: no) + */ ParserBuilder& withMmap(bool flag = true); + /** + * Are we only parsing, or doing something with the resulting + * commands and expressions? This setting affects whether the + * parser will raise certain errors about unimplemented features, + * even if there isn't a parsing error, because the result of the + * parse would otherwise be an incorrect parse tree and the error + * would go undetected. This is specifically for circumstances + * where the parser is ahead of the functionality present elsewhere + * in CVC4 (such as quantifiers, subtypes, records, etc. in the CVC + * language parser). + */ + ParserBuilder& withParseOnly(bool flag = true); + /** Derive settings from the given options. */ ParserBuilder& withOptions(const Options& options); - /** Should the parser use strict mode? (Default: no) */ + /** + * Should the parser use strict mode? + * + * (Default: no) + */ ParserBuilder& withStrictMode(bool flag = true); /** Set the parser to use the given stream for its input. */ diff --git a/src/parser/smt/Smt.g b/src/parser/smt/Smt.g index 96ac46bf1..86682f9a9 100644 --- a/src/parser/smt/Smt.g +++ b/src/parser/smt/Smt.g @@ -474,8 +474,8 @@ identifier[std::string& id, : IDENTIFIER { id = AntlrInput::tokenText($IDENTIFIER); Debug("parser") << "identifier: " << id - << " check? " << toString(check) - << " type? " << toString(type) << std::endl; + << " check? " << check + << " type? " << type << std::endl; PARSER_STATE->checkDeclaration(id, check, type); } ; @@ -489,7 +489,7 @@ let_identifier[std::string& id, : LET_IDENTIFIER { id = AntlrInput::tokenText($LET_IDENTIFIER); Debug("parser") << "let_identifier: " << id - << " check? " << toString(check) << std::endl; + << " check? " << check << std::endl; PARSER_STATE->checkDeclaration(id, check, SYM_VARIABLE); } ; @@ -503,7 +503,7 @@ flet_identifier[std::string& id, : FLET_IDENTIFIER { id = AntlrInput::tokenText($FLET_IDENTIFIER); Debug("parser") << "flet_identifier: " << id - << " check? " << toString(check) << std::endl; + << " check? " << check << std::endl; PARSER_STATE->checkDeclaration(id, check); } ; diff --git a/src/parser/smt/smt.cpp b/src/parser/smt/smt.cpp index bfac4f300..a8dabeffe 100644 --- a/src/parser/smt/smt.cpp +++ b/src/parser/smt/smt.cpp @@ -3,9 +3,9 @@ ** \verbatim ** Original author: cconway ** Major contributors: none - ** Minor contributors (to current version): mdeters, dejan + ** Minor contributors (to current version): dejan, mdeters ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -47,8 +47,8 @@ Smt::Logic Smt::toLogic(const std::string& name) { return logicMap[name]; } -Smt::Smt(ExprManager* exprManager, Input* input, bool strictMode) : - Parser(exprManager,input,strictMode), +Smt::Smt(ExprManager* exprManager, Input* input, bool strictMode, bool parseOnly) : + Parser(exprManager,input,strictMode,parseOnly), d_logicSet(false) { // Boolean symbols are always defined diff --git a/src/parser/smt/smt.h b/src/parser/smt/smt.h index 388b4cd6d..98ebf6410 100644 --- a/src/parser/smt/smt.h +++ b/src/parser/smt/smt.h @@ -5,7 +5,7 @@ ** Major contributors: none ** Minor contributors (to current version): mdeters ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -78,7 +78,7 @@ private: Logic d_logic; protected: - Smt(ExprManager* exprManager, Input* input, bool strictMode = false); + Smt(ExprManager* exprManager, Input* input, bool strictMode = false, bool parseOnly = false); public: /** diff --git a/src/parser/smt/smt_input.h b/src/parser/smt/smt_input.h index c5b147b71..0ab382c73 100644 --- a/src/parser/smt/smt_input.h +++ b/src/parser/smt/smt_input.h @@ -55,6 +55,11 @@ public: /** Destructor. Frees the lexer and the parser. */ ~SmtInput(); + /** Get the language that this Input is reading. */ + InputLanguage getLanguage() const throw() { + return language::input::LANG_SMTLIB; + } + protected: /** @@ -63,7 +68,7 @@ protected: * * @throws ParserException if an error is encountered during parsing. */ - Command* parseCommand() + Command* parseCommand() throw(ParserException, TypeCheckingException, AssertionException); /** @@ -72,7 +77,7 @@ protected: * * @throws ParserException if an error is encountered during parsing. */ - Expr parseExpr() + Expr parseExpr() throw(ParserException, TypeCheckingException, AssertionException); private: diff --git a/src/parser/smt2/Smt2.g b/src/parser/smt2/Smt2.g index d27abc735..4f141891b 100644 --- a/src/parser/smt2/Smt2.g +++ b/src/parser/smt2/Smt2.g @@ -673,8 +673,8 @@ symbol[std::string& id, : SYMBOL { id = AntlrInput::tokenText($SYMBOL); Debug("parser") << "symbol: " << id - << " check? " << toString(check) - << " type? " << toString(type) << std::endl; + << " check? " << check + << " type? " << type << std::endl; PARSER_STATE->checkDeclaration(id, check, type); } ; diff --git a/src/parser/smt2/smt2.cpp b/src/parser/smt2/smt2.cpp index 03b901164..4bc90ec8f 100644 --- a/src/parser/smt2/smt2.cpp +++ b/src/parser/smt2/smt2.cpp @@ -2,10 +2,10 @@ /*! \file smt2.cpp ** \verbatim ** Original author: cconway - ** Major contributors: none - ** Minor contributors (to current version): mdeters + ** Major contributors: mdeters + ** Minor contributors (to current version): none ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -24,8 +24,8 @@ namespace CVC4 { namespace parser { -Smt2::Smt2(ExprManager* exprManager, Input* input, bool strictMode) : - Parser(exprManager,input,strictMode), +Smt2::Smt2(ExprManager* exprManager, Input* input, bool strictMode, bool parseOnly) : + Parser(exprManager,input,strictMode,parseOnly), d_logicSet(false) { if( !strictModeEnabled() ) { addTheory(Smt2::THEORY_CORE); diff --git a/src/parser/smt2/smt2.h b/src/parser/smt2/smt2.h index ef5f5e729..e9363b970 100644 --- a/src/parser/smt2/smt2.h +++ b/src/parser/smt2/smt2.h @@ -2,10 +2,10 @@ /*! \file smt2.h ** \verbatim ** Original author: cconway - ** Major contributors: none - ** Minor contributors (to current version): mdeters + ** Major contributors: mdeters + ** Minor contributors (to current version): none ** This file is part of the CVC4 prototype. - ** Copyright (c) 2009, 2010 The Analysis of Computer Systems Group (ACSys) + ** Copyright (c) 2009, 2010, 2011 The Analysis of Computer Systems Group (ACSys) ** Courant Institute of Mathematical Sciences ** New York University ** See the file COPYING in the top-level source directory for licensing @@ -48,7 +48,7 @@ private: Smt::Logic d_logic; protected: - Smt2(ExprManager* exprManager, Input* input, bool strictMode = false); + Smt2(ExprManager* exprManager, Input* input, bool strictMode = false, bool parseOnly = false); public: /** diff --git a/src/parser/smt2/smt2_input.h b/src/parser/smt2/smt2_input.h index 836472107..04fe48fe1 100644 --- a/src/parser/smt2/smt2_input.h +++ b/src/parser/smt2/smt2_input.h @@ -64,6 +64,11 @@ public: /** Destructor. Frees the lexer and the parser. */ virtual ~Smt2Input(); + /** Get the language that this Input is reading. */ + InputLanguage getLanguage() const throw() { + return language::input::LANG_SMTLIB_V2; + } + protected: /** |