from pygments.lexer import RegexLexer from pygments import token class SmtLibLexer(RegexLexer): """This class implements a lexer for SMT-LIBv2. It tries to be very close to the SMT-LIBv2 standard while providing proper highlighting for everything cvc5 supports. The lexer implements the SMT-LIBv2 lexicon (section 3.1 of the standard) directly in the root state, as well as the commands, sorts and operators. Note that commands, sorts and operators are used to build regular expressions and, thus, can contain character classes (e.g. "[0-9]+"), but also need to escape certain characters (e.g. "\." or "\+"). """ name = 'smtlib' COMMANDS = [ 'assert', 'check-sat', 'check-sat-assuming', 'declare-const', 'declare-datatype', 'declare-datatypes', 'declare-codatatypes', 'declare-fun', 'declare-sort', 'define-fun', 'define-fun-rec', 'define-funs-rec', 'define-sort', 'echo', 'exit', 'get-assertions', 'get-assignment', 'get-info', 'get-model', 'get-option', 'get-proof', 'get-unsat-assumptions', 'get-unsat-core', 'get-value', 'pop', 'push', 'reset', 'reset-assertions', 'set-info', 'set-logic', 'set-option', ] SORTS = [ 'Array', 'BitVec', 'Bool', 'FloatingPoint', 'Float[0-9]+', 'Int', 'Real', 'RegLan', 'RoundingMode', 'Set', 'String', 'Tuple', ] OPERATORS = [ # array 'select', 'store', # bv 'concat', 'extract', 'repeat', 'zero_extend', 'sign_extend', 'rotate_left', 'rotate_right', 'bvnot', 'bvand', 'bvor', 'bvneg', 'bvadd', 'bvmul', 'bvudiv', 'bvurem', 'bvshl', 'bvlshr', 'bvult', 'bvnand', 'bvnor', 'bvxor', 'bvxnor', 'bvcomp', 'bvsub', 'bvsdiv', 'bvsrem', 'bvsmod', 'bvashr', 'bvule', 'bvugt', 'bvuge', 'bvslt', 'bvsle', 'bvsgt', 'bvsge', # core '=>', '=', 'true', 'false', 'not', 'and', 'or', 'xor', 'distinct', 'ite', # datatypes 'mkTuple', 'tupSel', # fp 'RNE', 'RNA', 'RTP', 'RTN', 'RTZ', 'fp', 'NaN', 'fp\.abs', 'fp\.neg', 'fp\.add', 'fp\.sub', 'fp\.mul', 'fp\.div', 'fp\.fma', 'fp\.sqrt', 'fp\.rem', 'fp\.roundToIntegral', 'fp\.min', 'fp\.max', 'fp\.leq', 'fp\.lt', 'fp\.geq', 'fp\.gt', 'fp\.eq', 'fp\.isNormal', 'fp\.isSubnormal', 'fp\.isZero', 'fp\.isInfinite', 'fp\.isNaN', 'fp\.isNegative', 'fp\.isPositive', 'to_fp', 'to_fp_unsigned', 'fp\.to_ubv', 'fp\.to_sbv', 'fp\.to_real', '\+oo', '-oo', '\+zero', '-zero', # int / real '<', '>', '<=', '>=', '!=', '\+', '-', '\*', '/', 'div', 'mod', 'abs', 'divisible', 'to_real', 'to_int', 'is_int', # separation logic 'emp', 'pto', 'sep', 'wand', 'nil', # sets / relations 'union', 'setminus', 'member', 'subset', 'emptyset', 'singleton', 'card', 'insert', 'complement', 'univset', 'transpose', 'tclosure', 'join', 'product', 'intersection', # string 'char', 'str\.\+\+', 'str\.len', 'str\.<', 'str\.to_re', 'str\.in_re', 're\.none', 're\.all', 're\.allchar', 're\.\+\+', 're\.union', 're\.inter', 're\.*', 'str\.<=', 'str\.at', 'str\.substr', 'str\.prefixof', 'str\.suffixof', 'str\.contains', 'str\.indexof', 'str\.replace', 'str\.replace_all', 'str\.replace_re', 'str\.replace_re_all', 're\.comp', 're\.diff', 're\.\+', 're\.opt', 're\.range', 're\.^', 're\.loop', 'str\.is_digit', 'str\.to_code', 'str\.from_code', 'str\.to_int', 'str\.from_int', ] tokens = { 'root': [ # comment (see lexicon) (r';.*$', token.Comment), # whitespace (r'\s+', token.Text), # numeral (decimal, hexadecimal, binary, see lexicon) (r'[0-9]+', token.Number), (r'#x[0-9a-fA-F]+', token.Number), (r'#b[01]+', token.Number), # bv constant (see BV theory specification) (r'bv[0-9]+', token.Number), # string constant (including escaped "", see lexicon) (r'".*?"', token.String), # reserved words (non-word and regular, see lexicon) (r'[!_](?=\s)', token.Name.Attribute), ('(as|let|exists|forall|match|per)\\b', token.Keyword), # Keywords (:foo, see lexicon) (r':[a-zA-Z~!@$%^&*_+=<>.?/-][a-zA-Z0-9~!@$%^&*_+=<>.?/-]*', token.Name.Attribute), # parentheses (r'\(', token.Text), (r'\)', token.Text), # commands (terminated by whitespace or ")") ('(' + '|'.join(COMMANDS) + ')(?=(\s|\)))', token.Keyword), # sorts (terminated by whitespace or ")") ('(' + '|'.join(SORTS) + ')(?=(\s|\)))', token.Name.Attribute), # operators (terminated by whitespace or ")") ('(' + '|'.join(OPERATORS) + ')(?=(\s|\)))', token.Operator), # symbols (regular and quoted, see lexicon) (r'[a-zA-Z~!@$%^&*_+=<>.?/-][a-zA-Z0-9~!@$%^&*_+=<>.?/-]*', token.Name), (r'\|[^|\\]*\|', token.Name), ], }