src/parser/cvc/CvcLexer.g


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

options {
  language = "Cpp";            // C++ output for antlr
  namespaceStd  = "std";       // Cosmetic option to get rid of long defines in generated code
  namespaceAntlr = "antlr";    // Cosmetic option to get rid of long defines in generated code
  namespace = "CVC4::parser";  // Wrap everything in the smtparser namespace
}
   
/**
 * AntlrCvcLexer class is a stream tokenizer (lexer) for the CVC language. 
 */
class AntlrCvcLexer extends Lexer;

options {
  exportVocab = CvcVocabulary;  // Name of the shared token vocabulary
  testLiterals = false;         // Do not check for literals by default
  defaultErrorHandler = false;  // Skip the defaul error handling, just break with exceptions
  k = 2;  
}
 
tokens {
  // Types 
  BOOLEAN       = "BOOLEAN";
  // Boolean oparators
  AND           = "AND";
  IF            = "IF";
  THEN          = "THEN";
  ELSE          = "ELSE";
  ELSEIF        = "ELSIF";
  ENDIF         = "ENDIF";
  NOT           = "NOT";
  OR            = "OR";
  TRUE          = "TRUE";
  FALSE         = "FALSE";
  XOR           = "XOR";
  IMPLIES       = "=>";
  IFF           = "<=>";
  // Commands
  ASSERT        = "ASSERT";
  QUERY         = "QUERY";
  CHECKSAT      = "CHECKSAT";
  PRINT         = "PRINT";
  EXHO          = "ECHO";
  
  PUSH          = "PUSH";
  POP           = "POP";
  POPTO         = "POPTO";
}

/**
 * Matches any letter ('a'-'z' and 'A'-'Z').
 */
protected 
ALPHA options{ paraphrase = "a letter"; } 
  : 'a'..'z' 
  | 'A'..'Z'
  ;
  
/**
 * Matches the digits (0-9)
 */
protected 
DIGIT options{ paraphrase = "a digit"; } 
  : '0'..'9'
  ;

/**
 * Matches the ':'
 */
COLON options{ paraphrase = "a comma"; } 
  : ':'
  ;

/**
 * Matches the ','
 */
COMMA options{ paraphrase = "a comma"; } 
  : ','
  ;

/**
 * Matches an identifier from the input. An identifier is a sequence of letters,
 * digits and "_", "'", "." symbols, starting with a letter. 
 */
IDENTIFIER options { paraphrase = "an identifier"; testLiterals = true; }
  : ALPHA (ALPHA | DIGIT | '_' | '\'' | '.')*
  ;
      
/**
 * Matches the left bracket ('(').
 */
LPAREN options { paraphrase = "a left parenthesis '('"; }      
  : '(';
  
/**
 * Matches the right bracket ('(').
 */
RPAREN options { paraphrase = "a right parenthesis ')'"; }  
  : ')';

/**
 * Matches and skips whitespace in the input and ignores it. 
 */
WHITESPACE options { paraphrase = "whitespace"; }
  : (' ' | '\t' | '\f')              { $setType(antlr::Token::SKIP); }
  ;

/**
 * Mathces and skips the newline symbols in the input.
 */
NEWLINE options { paraphrase = "a newline"; }  
  : ('\r' '\n' | '\r' | '\n')       { $setType(antlr::Token::SKIP); newline(); }
  ;

/**
 * Mathces the comments and ignores them
 */
COMMENT options { paraphrase = "comment"; }
  : ';' (~('\n' | '\r'))*                    { $setType(antlr::Token::SKIP); }
  ;
      
/**
 * Matches a numeral from the input (non-empty sequence of digits).
 */
NUMERAL options { paraphrase = "a numeral"; }
  : (DIGIT)+
  ;