summaryrefslogtreecommitdiff
path: root/src/parser/input.h
blob: 21c5c4869f118af83c3be36da1788347928f6708 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
/*********************                                                        */
/** input.h
 ** Original author: cconway
 ** Major contributors: none
 ** Minor contributors (to current version): none
 ** This file is part of the CVC4 prototype.
 ** Copyright (c) 2009, 2010  The Analysis of Computer Systems Group (ACSys)
 ** Courant Institute of Mathematical Sciences
 ** New York University
 ** See the file COPYING in the top-level source directory for licensing
 ** information.
 **
 ** Base for ANTLR parser classes.
 **/

#include "cvc4parser_private.h"

#ifndef __CVC4__PARSER__ANTLR_INPUT_H
#define __CVC4__PARSER__ANTLR_INPUT_H

#include <antlr3.h>
#include <iostream>
#include <string>
#include <vector>

#include "expr/expr.h"
#include "expr/expr_manager.h"
#include "parser/parser_exception.h"
#include "parser/parser_options.h"
#include "util/Assert.h"

namespace CVC4 {

class Command;
class Type;
class FunctionType;

namespace parser {

/** Wrapper around an ANTLR3 input stream. */
class AntlrInputStream {
  std::string d_name;
  pANTLR3_INPUT_STREAM d_input;

  AntlrInputStream(std::string name,pANTLR3_INPUT_STREAM input);
  /* This is private and throws an exception, because you should never use it. */
  AntlrInputStream(const AntlrInputStream& inputStream) {
    Unimplemented("copy constructor for AntlrInputStream");
  }
  /* This is private and throws an exception, because you should never use it. */
  AntlrInputStream& operator=(const AntlrInputStream& inputStream) {
    Unimplemented("operator= for AntlrInputStream");
  }

public:

  virtual ~AntlrInputStream();

  pANTLR3_INPUT_STREAM getAntlr3InputStream() const;
  const std::string getName() const;

  /** Create a file input.
   *
   * @param filename the path of the file to read
   * @param useMmap <code>true</code> if the input should use memory-mapped I/O; otherwise, the
   * input will use the standard ANTLR3 I/O implementation.
   */
  static AntlrInputStream* newFileInputStream(const std::string& name, bool useMmap = false);

  /** Create an input from an istream. */
  // AntlrInputStream newInputStream(std::istream& input, const std::string& name);

  /** Create a string input.
   *
   * @param input the string to read
   * @param name the "filename" to use when reporting errors
   */
  static AntlrInputStream* newStringInputStream(const std::string& input, const std::string& name);
};

class Parser;

/**
 * An input to be parsed. The static factory methods in this class (e.g.,
 * <code>newFileInput</code>, <code>newStringInput</code>) create a parser
 * for the given input language and attach it to an input source of the
 * appropriate type.
 */
class CVC4_PUBLIC Input {
  friend class Parser; // for parseError, parseCommand, parseExpr

  /** The display name of the input (e.g., the filename). */
  std::string d_name;

  /** The token lookahead used to lex and parse the input. This should usually be equal to
   * <code>K</code> for an LL(k) grammar. */
  unsigned int d_lookahead;

  /** The ANTLR3 lexer associated with this input. This will be <code>NULL</code> initially. It
   *  must be set by a call to <code>setLexer</code>, preferably in the subclass constructor. */
  pANTLR3_LEXER d_lexer;

  /** The ANTLR3 parser associated with this input. This will be <code>NULL</code> initially. It
   *  must be set by a call to <code>setParser</code>, preferably in the subclass constructor.
   *  The <code>super</code> field of <code>d_parser</code> will be set to <code>this</code> and
   *  <code>reportError</code> will be set to <code>Input::reportError</code>. */
  pANTLR3_PARSER d_parser;

  /** The ANTLR3 token stream associated with this input. We only need this so we can free it on exit.
   *  This is set by <code>setLexer</code>.
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
  pANTLR3_COMMON_TOKEN_STREAM d_tokenStream;

  /** The ANTLR3 input stream associated with this input. We only need this so we can free it on exit.
   *  NOTE: We assume that we <em>can</em> free it on exit. No sharing! */
  AntlrInputStream *d_inputStream;

  /** Turns an ANTLR3 exception into a message for the user and calls <code>parseError</code>. */
  static void reportError(pANTLR3_BASE_RECOGNIZER recognizer);

  /** Builds a message for a lexer error and calls <code>parseError</code>. */
  static void lexerError(pANTLR3_BASE_RECOGNIZER recognizer);

  /* Since we own d_tokenStream and it needs to be freed, we need to prevent
   * copy construction and assignment.
   */
  Input(const Input& input) { Unimplemented("Copy constructor for Input."); }
  Input& operator=(const Input& input) { Unimplemented("operator= for Input."); }

public:

  /** Destructor. Frees the token stream and closes the input. */
  virtual ~Input();

  /** Create an input for the given file.
    *
    * @param lang the input language
    * @param filename the input filename
    * @param useMmap true if the parser should use memory-mapped I/O (default: false)
    */
  static Input* newFileInput(InputLanguage lang, const std::string& filename, bool useMmap=false);

  /** Create an input for the given AntlrInputStream. NOTE: the new Input
   * will take ownership of the input stream and delete it at destruction time.
   *
   * @param lang the input language
   * @param inputStream the input stream
   *
   * */
  static Input* newInput(InputLanguage lang, AntlrInputStream *inputStream);

  /** Create an input for the given stream.
   *
   * @param lang the input language
   * @param input the input stream
   * @param name the name of the stream, for use in error messages
   */
  //static Parser* newStreamInput(InputLanguage lang, std::istream& input, const std::string& name);

  /** Create an input for the given string
   *
   * @param lang the input language
   * @param input the input string
   * @param name the name of the stream, for use in error messages
   */
  static Input* newStringInput(InputLanguage lang, const std::string& input, const std::string& name);

  /** Retrieve the text associated with a token. */
  inline static std::string tokenText(pANTLR3_COMMON_TOKEN token);


protected:
  /** Create an input. This input takes ownership of the given input stream,
   * and will delete it at destruction time.
   *
   * @param inputStream the input stream to use
   * @param lookahead the lookahead needed to parse the input (i.e., k for
   * an LL(k) grammar)
   */
  Input(AntlrInputStream *inputStream, unsigned int lookahead);


  /** Retrieve the input stream for this parser. */
  AntlrInputStream *getInputStream();

  /** Retrieve the token stream for this parser. Must not be called before
   * <code>setLexer()</code>. */
  pANTLR3_COMMON_TOKEN_STREAM getTokenStream();

  /** Parse a command from
   * the input by invoking the implementation-specific parsing method.  Returns
   * <code>NULL</code> if there is no command there to parse.
   *
   * @throws ParserException if an error is encountered during parsing.
   */
  virtual Command* parseCommand() throw(ParserException) = 0;

  /**
   * Throws a <code>ParserException</code> with the given message.
   */
  void parseError(const std::string& msg) throw (ParserException);

  /** Parse an
   * expression from the input by invoking the implementation-specific
   * parsing method. Returns a null <code>Expr</code> if there is no
   * expression there to parse.
   *
   * @throws ParserException if an error is encountered during parsing.
   */
  virtual Expr parseExpr() throw(ParserException) = 0;

  /** Set the ANTLR3 lexer for this input. */
  void setAntlr3Lexer(pANTLR3_LEXER pLexer);

  /** Set the ANTLR3 parser implementation for this input. */
  void setAntlr3Parser(pANTLR3_PARSER pParser);

  /** Set the Parser object for this input. */
  void setParser(Parser *parser);
};

std::string Input::tokenText(pANTLR3_COMMON_TOKEN token) {
  ANTLR3_MARKER start = token->getStartIndex(token);
  ANTLR3_MARKER end = token->getStopIndex(token);
  /* start and end are boundary pointers. The text is a string
   * of (end-start+1) bytes beginning at start. */
  std::string txt( (const char *)start, end-start+1 );
  Debug("parser-extra") << "tokenText: start=" << start << std::endl
                        <<  "end=" << end << std::endl
                        <<  "txt='" << txt << "'" << std::endl;
  return txt;
}

}/* CVC4::parser namespace */
}/* CVC4 namespace */

#endif /* __CVC4__PARSER__ANTLR_INPUT_H */
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback