diff --git a/.gitignore b/.gitignore index 924ca1d..1ea3801 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,8 @@ build/ src/pyrex/bison_.c src/pyrex/bison_.h +bison.egg-info +src/bison_.cpython-35m-x86_64-linux-gnu.so +examples/java/javaparser.py +dist/ +*.so diff --git a/Makefile b/Makefile index 97c1923..91a462f 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,31 @@ +.PHONY: all module install clean develop java_test + +PYTHON=python + +JAVAS=javaparser.y javaparser.l + all: module module: - python2 setup.py build + $(PYTHON) setup.py build install: - python2 setup.py install + $(PYTHON) setup.py install clean: + $(PYTHON) setup.py clean rm -rf *~ *.output tokens.h *.tab.* *.yy.c java-grammar new.* *.o *.so dummy build *.pxi *-lexer.c rm -rf *-parser.y *-parser.c *-parser.h pybison.c pybison.h rm -rf bison.c bison.h rm -rf *.pyc rm -rf tmp.* rm -f src/pyrex/bison_.pxi src/pyrex/bison_.c src/pyrex/bison_.h + #rm -rf *.cpython-*.so + +develop: + $(PYTHON) setup.py develop + + +java_test: develop + cd examples/java && bison2py $(JAVAS) javaparser.py && $(PYTHON) run.py + \ No newline at end of file diff --git a/__init__.py b/__init__.py index 49a2717..ff32576 100644 --- a/__init__.py +++ b/__init__.py @@ -1,4 +1,5 @@ -from src.python.node import BisonNode -from src.python.bison import BisonParser, BisonSyntaxError +from __future__ import absolute_import +from .src.python.node import BisonNode +from .src.python.bison import BisonParser, BisonSyntaxError __all__ = ['BisonNode', 'BisonParser', 'BisonSyntaxError'] diff --git a/doc/calc.py b/doc/calc.py index ee9d564..bc8ce42 100755 --- a/doc/calc.py +++ b/doc/calc.py @@ -2,7 +2,10 @@ """ A simple pybison parser program implementing a calculator """ +from __future__ import absolute_import +from __future__ import print_function from bison import BisonParser +from six.moves import input class Parser(BisonParser): @@ -33,7 +36,7 @@ class Parser(BisonParser): # ------------------------------------------------------------------ def read(self, nbytes): try: - return raw_input("> ") + "\n" + return input("> ") + "\n" except EOFError: return '' @@ -63,7 +66,7 @@ def on_line(self, target, option, names, values): | exp NEWLINE """ if option == 1: - print values[0] + print(values[0]) def on_exp(self, target, option, names, values): """ @@ -109,7 +112,7 @@ def on_exp(self, target, option, names, values): extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); #define returntoken(tok) \ - yylval = PyString_FromString(strdup(yytext)); return (tok); + yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { \ (*py_input)(py_parser, buf, &result, max_size); \ } diff --git a/doc/walkthrough.html b/doc/walkthrough.html index fb0f606..0555eb1 100644 --- a/doc/walkthrough.html +++ b/doc/walkthrough.html @@ -208,7 +208,7 @@

1.3. Preparing Your Tokeniser File

#include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); - #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); + #define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) {(*py_input)(py_parser, buf, &result, max_size);} }% @@ -257,7 +257,7 @@

1.3. Preparing Your Tokeniser File

-    #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok);
+ #define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); A macro which wraps all tokens values as Python strings, so your parser target handlers can uplift the original input text which constitutes that token. @@ -685,7 +685,7 @@

2.8. Add Flex Script

     %{
-    int yylineno = 0;
+    int yylineno = 0; // Remove if engine fails to build.
     #include <stdio.h>
     #include <string.h>
     #include "Python.h"
@@ -693,7 +693,7 @@ 

2.8. Add Flex Script

#include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); - #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); + #define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } %}
@@ -770,7 +770,7 @@

2.8. Add Flex Script

lexscript = r""" %{ - int yylineno = 0; + int yylineno = 0; // Remove if engine fails to build. #include <stdio.h> #include <string.h> #include "Python.h" @@ -778,7 +778,7 @@

2.8. Add Flex Script

#include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); - #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); + #define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } %} diff --git a/examples/C/c.l b/examples/C/c.l index 29310f4..0c7cf6d 100644 --- a/examples/C/c.l +++ b/examples/C/c.l @@ -11,7 +11,7 @@ IS (u|U|l|L)* /* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */ void count(); -int yylineno = 0; +//int yylineno = 0; #include #include #include "Python.h" @@ -19,7 +19,7 @@ int yylineno = 0; #include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok); +#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } %} diff --git a/examples/C/c.py b/examples/C/c.py new file mode 100644 index 0000000..d53cc5b --- /dev/null +++ b/examples/C/c.py @@ -0,0 +1,1818 @@ +#!/usr/bin/env python + +""" +PyBison file automatically generated from grammar file c.y +You can edit this module, or import it and subclass the Parser class +""" + +import sys + +from bison import BisonParser, BisonNode, BisonSyntaxError + +bisonFile = 'c.y' # original bison file +lexFile = 'c.l' # original flex file + +class ParseNode(BisonNode): + """ + This is the base class from which all your + parse nodes are derived. + Add methods to this class as you need them + """ + def __init__(self, **kw): + BisonNode.__init__(self, **kw) + + def __str__(self): + """Customise as needed""" + return '<%s instance at 0x%x>' % (self.__class__.__name__, hash(self)) + + def __repr__(self): + """Customise as needed""" + return str(self) + + def dump(self, indent=0): + """ + Dump out human-readable, indented parse tree + Customise as needed - here, or in the node-specific subclasses + """ + BisonNode.dump(self, indent) # alter as needed + + +# ------------------------------------------------------ +# Define a node class for each grammar target +# ------------------------------------------------------ + +class primary_expression_Node(ParseNode): + """ + Holds a "primary_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class postfix_expression_Node(ParseNode): + """ + Holds a "postfix_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class argument_expression_list_Node(ParseNode): + """ + Holds an "argument_expression_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class unary_expression_Node(ParseNode): + """ + Holds an "unary_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class unary_operator_Node(ParseNode): + """ + Holds an "unary_operator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class cast_expression_Node(ParseNode): + """ + Holds a "cast_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class multiplicative_expression_Node(ParseNode): + """ + Holds a "multiplicative_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class additive_expression_Node(ParseNode): + """ + Holds an "additive_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class shift_expression_Node(ParseNode): + """ + Holds a "shift_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class relational_expression_Node(ParseNode): + """ + Holds a "relational_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class equality_expression_Node(ParseNode): + """ + Holds an "equality_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class and_expression_Node(ParseNode): + """ + Holds an "and_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class exclusive_or_expression_Node(ParseNode): + """ + Holds an "exclusive_or_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class inclusive_or_expression_Node(ParseNode): + """ + Holds an "inclusive_or_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class logical_and_expression_Node(ParseNode): + """ + Holds a "logical_and_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class logical_or_expression_Node(ParseNode): + """ + Holds a "logical_or_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class conditional_expression_Node(ParseNode): + """ + Holds a "conditional_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class assignment_expression_Node(ParseNode): + """ + Holds an "assignment_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class assignment_operator_Node(ParseNode): + """ + Holds an "assignment_operator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class expression_Node(ParseNode): + """ + Holds an "expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class constant_expression_Node(ParseNode): + """ + Holds a "constant_expression" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class declaration_Node(ParseNode): + """ + Holds a "declaration" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class declaration_specifiers_Node(ParseNode): + """ + Holds a "declaration_specifiers" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class init_declarator_list_Node(ParseNode): + """ + Holds an "init_declarator_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class init_declarator_Node(ParseNode): + """ + Holds an "init_declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class storage_class_specifier_Node(ParseNode): + """ + Holds a "storage_class_specifier" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class type_specifier_Node(ParseNode): + """ + Holds a "type_specifier" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_or_union_specifier_Node(ParseNode): + """ + Holds a "struct_or_union_specifier" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_or_union_Node(ParseNode): + """ + Holds a "struct_or_union" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_declaration_list_Node(ParseNode): + """ + Holds a "struct_declaration_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_declaration_Node(ParseNode): + """ + Holds a "struct_declaration" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class specifier_qualifier_list_Node(ParseNode): + """ + Holds a "specifier_qualifier_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_declarator_list_Node(ParseNode): + """ + Holds a "struct_declarator_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class struct_declarator_Node(ParseNode): + """ + Holds a "struct_declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class enum_specifier_Node(ParseNode): + """ + Holds an "enum_specifier" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class enumerator_list_Node(ParseNode): + """ + Holds an "enumerator_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class enumerator_Node(ParseNode): + """ + Holds an "enumerator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class type_qualifier_Node(ParseNode): + """ + Holds a "type_qualifier" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class declarator_Node(ParseNode): + """ + Holds a "declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class direct_declarator_Node(ParseNode): + """ + Holds a "direct_declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class pointer_Node(ParseNode): + """ + Holds a "pointer" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class type_qualifier_list_Node(ParseNode): + """ + Holds a "type_qualifier_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class parameter_type_list_Node(ParseNode): + """ + Holds a "parameter_type_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class parameter_list_Node(ParseNode): + """ + Holds a "parameter_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class parameter_declaration_Node(ParseNode): + """ + Holds a "parameter_declaration" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class identifier_list_Node(ParseNode): + """ + Holds an "identifier_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class type_name_Node(ParseNode): + """ + Holds a "type_name" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class abstract_declarator_Node(ParseNode): + """ + Holds an "abstract_declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class direct_abstract_declarator_Node(ParseNode): + """ + Holds a "direct_abstract_declarator" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class initializer_Node(ParseNode): + """ + Holds an "initializer" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class initializer_list_Node(ParseNode): + """ + Holds an "initializer_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class statement_Node(ParseNode): + """ + Holds a "statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class labeled_statement_Node(ParseNode): + """ + Holds a "labeled_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class compound_statement_Node(ParseNode): + """ + Holds a "compound_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class declaration_list_Node(ParseNode): + """ + Holds a "declaration_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class statement_list_Node(ParseNode): + """ + Holds a "statement_list" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class expression_statement_Node(ParseNode): + """ + Holds an "expression_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class selection_statement_Node(ParseNode): + """ + Holds a "selection_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class iteration_statement_Node(ParseNode): + """ + Holds an "iteration_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class jump_statement_Node(ParseNode): + """ + Holds a "jump_statement" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class translation_unit_Node(ParseNode): + """ + Holds a "translation_unit" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class external_declaration_Node(ParseNode): + """ + Holds an "external_declaration" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class function_definition_Node(ParseNode): + """ + Holds a "function_definition" parse target and its components. + """ + def __init__(self, **kw): + ParseNode.__init__(self, **kw) + + def dump(self, indent=0): + ParseNode.dump(self, indent) + +class Parser(BisonParser): + """ + bison Parser class generated automatically by bison2py from the + grammar file "c.y" and lex file "c.l" + + You may (and probably should) edit the methods in this class. + You can freely edit the rules (in the method docstrings), the + tokens list, the start symbol, and the precedences. + + Each time this class is instantiated, a hashing technique in the + base class detects if you have altered any of the rules. If any + changes are detected, a new dynamic lib for the parser engine + will be generated automatically. + """ + + # -------------------------------------------- + # basename of binary parser engine dynamic lib + # -------------------------------------------- + bisonEngineLibName = 'c-engine' + + # ---------------------------------------------------------------- + # lexer tokens - these must match those in your lex script (below) + # ---------------------------------------------------------------- + tokens = ['IDENTIFIER', 'CONSTANT', 'STRING_LITERAL', 'SIZEOF', 'PTR_OP', 'INC_OP', 'DEC_OP', 'LEFT_OP', 'RIGHT_OP', 'LE_OP', 'GE_OP', 'EQ_OP', 'NE_OP', 'BOOL_AND_OP', 'BOOL_OR_OP', 'MUL_ASSIGN', 'DIV_ASSIGN', 'MOD_ASSIGN', 'ADD_ASSIGN', 'SUB_ASSIGN', 'LEFT_ASSIGN', 'RIGHT_ASSIGN', 'AND_ASSIGN', 'XOR_ASSIGN', 'OR_ASSIGN', 'TYPE_NAME', 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'LBRACE', 'RBRACE', 'PERIOD', 'COMMA', 'COLON', 'SEMICOLON', 'QUESTIONMARK', 'PLUS', 'MINUS', 'STAR', 'SLASH', 'ASSIGN', 'AND_OP', 'OR_OP', 'BANG', 'TILDE', 'PERCENT', 'CIRCUMFLEX', 'GT_OP', 'LT_OP', 'TYPEDEF', 'EXTERN', 'STATIC', 'AUTO', 'REGISTER', 'CHAR', 'SHORT', 'INT', 'LONG', 'SIGNED', 'UNSIGNED', 'FLOAT', 'DOUBLE', 'CONST', 'VOLATILE', 'VOID', 'STRUCT', 'UNION', 'ENUM', 'ELLIPSIS', 'CASE', 'DEFAULT', 'IF', 'ELSE', 'SWITCH', 'WHILE', 'DO', 'FOR', 'GOTO', 'CONTINUE', 'BREAK', 'RETURN'] + + # ------------------------------ + # precedences + # ------------------------------ + precedences = ( + ('left', ['COMMA'],), + ('right', ['ASSIGN', 'ADD_ASSIGN', 'SUB_ASSIGN', 'MUL_ASSIGN', 'DIV_ASSIGN', 'MOD_ASSIGN', 'LEFT_ASSIGN', 'RIGHT_ASSIGN', 'AND_ASSIGN', 'XOR_ASSIGN', 'OR_ASSIGN'],), + ('right', ['QUESTIONMARK', 'COLON'],), + ('left', ['BOOL_OR_OP'],), + ('left', ['BOOL_AND_OP'],), + ('left', ['OR_OP'],), + ('left', ['CIRCUMFLEX'],), + ('left', ['AND_OP'],), + ('left', ['EQ_OP', 'NE_OP'],), + ('left', ['LT_OP', 'GT_OP', 'LE_OP', 'GE_OP'],), + ('left', ['LEFT_OP', 'RIGHT_OP'],), + ('left', ['PLUS', 'MINUS'],), + ('left', ['STAR', 'SLASH', 'PERCENT'],), + ('right', ['NOT', 'NEG'],), + ('right', ['INC_OP', 'SIZEOF', 'DEC_OP'],), + ('left', ['LBRACKET', 'LPAREN', 'PERIOD', 'PTR_OP'],), + ) + + # --------------------------------------------------------------- + # Declare the start target here (by name) + # --------------------------------------------------------------- + start = 'translation_unit' + + # --------------------------------------------------------------- + # These methods are the python handlers for the bison targets. + # (which get called by the bison code each time the corresponding + # parse target is unambiguously reached) + # + # WARNING - don't touch the method docstrings unless you know what + # you are doing - they are in bison rule syntax, and are passed + # verbatim to bison to build the parser engine library. + # --------------------------------------------------------------- + + def on_primary_expression(self, target, option, names, values): + """ + primary_expression + : IDENTIFIER + | CONSTANT + | STRING_LITERAL + | LPAREN expression RPAREN + """ + return primary_expression_Node( + target='primary_expression', + option=option, + names=names, + values=values) + + def on_postfix_expression(self, target, option, names, values): + """ + postfix_expression + : primary_expression + | postfix_expression LBRACKET expression RBRACKET + | postfix_expression LPAREN RPAREN + | postfix_expression LPAREN argument_expression_list RPAREN + | postfix_expression PERIOD IDENTIFIER + | postfix_expression PTR_OP IDENTIFIER + | postfix_expression INC_OP + | postfix_expression DEC_OP + """ + return postfix_expression_Node( + target='postfix_expression', + option=option, + names=names, + values=values) + + def on_argument_expression_list(self, target, option, names, values): + """ + argument_expression_list + : assignment_expression + | argument_expression_list COMMA assignment_expression + """ + return argument_expression_list_Node( + target='argument_expression_list', + option=option, + names=names, + values=values) + + def on_unary_expression(self, target, option, names, values): + """ + unary_expression + : postfix_expression + | INC_OP unary_expression + | DEC_OP unary_expression + | unary_operator cast_expression + | SIZEOF unary_expression + | SIZEOF LPAREN type_name RPAREN + """ + return unary_expression_Node( + target='unary_expression', + option=option, + names=names, + values=values) + + def on_unary_operator(self, target, option, names, values): + """ + unary_operator + : AND_OP + | STAR + | PLUS + | MINUS + | TILDE + | BANG + """ + return unary_operator_Node( + target='unary_operator', + option=option, + names=names, + values=values) + + def on_cast_expression(self, target, option, names, values): + """ + cast_expression + : unary_expression + | LPAREN type_name RPAREN cast_expression + """ + return cast_expression_Node( + target='cast_expression', + option=option, + names=names, + values=values) + + def on_multiplicative_expression(self, target, option, names, values): + """ + multiplicative_expression + : cast_expression + | multiplicative_expression STAR cast_expression + | multiplicative_expression SLASH cast_expression + | multiplicative_expression PERCENT cast_expression + """ + return multiplicative_expression_Node( + target='multiplicative_expression', + option=option, + names=names, + values=values) + + def on_additive_expression(self, target, option, names, values): + """ + additive_expression + : multiplicative_expression + | additive_expression PLUS multiplicative_expression + | additive_expression MINUS multiplicative_expression + """ + return additive_expression_Node( + target='additive_expression', + option=option, + names=names, + values=values) + + def on_shift_expression(self, target, option, names, values): + """ + shift_expression + : additive_expression + | shift_expression LEFT_OP additive_expression + | shift_expression RIGHT_OP additive_expression + """ + return shift_expression_Node( + target='shift_expression', + option=option, + names=names, + values=values) + + def on_relational_expression(self, target, option, names, values): + """ + relational_expression + : shift_expression + | relational_expression LT_OP shift_expression + | relational_expression GT_OP shift_expression + | relational_expression LE_OP shift_expression + | relational_expression GE_OP shift_expression + """ + return relational_expression_Node( + target='relational_expression', + option=option, + names=names, + values=values) + + def on_equality_expression(self, target, option, names, values): + """ + equality_expression + : relational_expression + | equality_expression EQ_OP relational_expression + | equality_expression NE_OP relational_expression + """ + return equality_expression_Node( + target='equality_expression', + option=option, + names=names, + values=values) + + def on_and_expression(self, target, option, names, values): + """ + and_expression + : equality_expression + | and_expression AND_OP equality_expression + """ + return and_expression_Node( + target='and_expression', + option=option, + names=names, + values=values) + + def on_exclusive_or_expression(self, target, option, names, values): + """ + exclusive_or_expression + : and_expression + | exclusive_or_expression CIRCUMFLEX and_expression + """ + return exclusive_or_expression_Node( + target='exclusive_or_expression', + option=option, + names=names, + values=values) + + def on_inclusive_or_expression(self, target, option, names, values): + """ + inclusive_or_expression + : exclusive_or_expression + | inclusive_or_expression OR_OP exclusive_or_expression + """ + return inclusive_or_expression_Node( + target='inclusive_or_expression', + option=option, + names=names, + values=values) + + def on_logical_and_expression(self, target, option, names, values): + """ + logical_and_expression + : inclusive_or_expression + | logical_and_expression BOOL_AND_OP inclusive_or_expression + """ + return logical_and_expression_Node( + target='logical_and_expression', + option=option, + names=names, + values=values) + + def on_logical_or_expression(self, target, option, names, values): + """ + logical_or_expression + : logical_and_expression + | logical_or_expression BOOL_OR_OP logical_and_expression + """ + return logical_or_expression_Node( + target='logical_or_expression', + option=option, + names=names, + values=values) + + def on_conditional_expression(self, target, option, names, values): + """ + conditional_expression + : logical_or_expression + | logical_or_expression QUESTIONMARK expression COLON conditional_expression + """ + return conditional_expression_Node( + target='conditional_expression', + option=option, + names=names, + values=values) + + def on_assignment_expression(self, target, option, names, values): + """ + assignment_expression + : conditional_expression + | unary_expression assignment_operator assignment_expression + """ + return assignment_expression_Node( + target='assignment_expression', + option=option, + names=names, + values=values) + + def on_assignment_operator(self, target, option, names, values): + """ + assignment_operator + : ASSIGN + | MUL_ASSIGN + | DIV_ASSIGN + | MOD_ASSIGN + | ADD_ASSIGN + | SUB_ASSIGN + | LEFT_ASSIGN + | RIGHT_ASSIGN + | AND_ASSIGN + | XOR_ASSIGN + | OR_ASSIGN + """ + return assignment_operator_Node( + target='assignment_operator', + option=option, + names=names, + values=values) + + def on_expression(self, target, option, names, values): + """ + expression + : assignment_expression + | expression COMMA assignment_expression + """ + return expression_Node( + target='expression', + option=option, + names=names, + values=values) + + def on_constant_expression(self, target, option, names, values): + """ + constant_expression + : conditional_expression + """ + return constant_expression_Node( + target='constant_expression', + option=option, + names=names, + values=values) + + def on_declaration(self, target, option, names, values): + """ + declaration + : declaration_specifiers SEMICOLON + | declaration_specifiers init_declarator_list SEMICOLON + """ + return declaration_Node( + target='declaration', + option=option, + names=names, + values=values) + + def on_declaration_specifiers(self, target, option, names, values): + """ + declaration_specifiers + : storage_class_specifier + | storage_class_specifier declaration_specifiers + | type_specifier + | type_specifier declaration_specifiers + | type_qualifier + | type_qualifier declaration_specifiers + """ + return declaration_specifiers_Node( + target='declaration_specifiers', + option=option, + names=names, + values=values) + + def on_init_declarator_list(self, target, option, names, values): + """ + init_declarator_list + : init_declarator + | init_declarator_list COMMA init_declarator + """ + return init_declarator_list_Node( + target='init_declarator_list', + option=option, + names=names, + values=values) + + def on_init_declarator(self, target, option, names, values): + """ + init_declarator + : declarator + | declarator ASSIGN initializer + """ + return init_declarator_Node( + target='init_declarator', + option=option, + names=names, + values=values) + + def on_storage_class_specifier(self, target, option, names, values): + """ + storage_class_specifier + : TYPEDEF + | EXTERN + | STATIC + | AUTO + | REGISTER + """ + return storage_class_specifier_Node( + target='storage_class_specifier', + option=option, + names=names, + values=values) + + def on_type_specifier(self, target, option, names, values): + """ + type_specifier + : VOID + | CHAR + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + | SIGNED + | UNSIGNED + | struct_or_union_specifier + | enum_specifier + | TYPE_NAME + """ + return type_specifier_Node( + target='type_specifier', + option=option, + names=names, + values=values) + + def on_struct_or_union_specifier(self, target, option, names, values): + """ + struct_or_union_specifier + : struct_or_union IDENTIFIER LBRACE struct_declaration_list RBRACE + | struct_or_union LBRACE struct_declaration_list RBRACE + | struct_or_union IDENTIFIER + """ + return struct_or_union_specifier_Node( + target='struct_or_union_specifier', + option=option, + names=names, + values=values) + + def on_struct_or_union(self, target, option, names, values): + """ + struct_or_union + : STRUCT + | UNION + """ + return struct_or_union_Node( + target='struct_or_union', + option=option, + names=names, + values=values) + + def on_struct_declaration_list(self, target, option, names, values): + """ + struct_declaration_list + : struct_declaration + | struct_declaration_list struct_declaration + """ + return struct_declaration_list_Node( + target='struct_declaration_list', + option=option, + names=names, + values=values) + + def on_struct_declaration(self, target, option, names, values): + """ + struct_declaration + : specifier_qualifier_list struct_declarator_list SEMICOLON + """ + return struct_declaration_Node( + target='struct_declaration', + option=option, + names=names, + values=values) + + def on_specifier_qualifier_list(self, target, option, names, values): + """ + specifier_qualifier_list + : type_specifier specifier_qualifier_list + | type_specifier + | type_qualifier specifier_qualifier_list + | type_qualifier + """ + return specifier_qualifier_list_Node( + target='specifier_qualifier_list', + option=option, + names=names, + values=values) + + def on_struct_declarator_list(self, target, option, names, values): + """ + struct_declarator_list + : struct_declarator + | struct_declarator_list COMMA struct_declarator + """ + return struct_declarator_list_Node( + target='struct_declarator_list', + option=option, + names=names, + values=values) + + def on_struct_declarator(self, target, option, names, values): + """ + struct_declarator + : declarator + | COLON constant_expression + | declarator COLON constant_expression + """ + return struct_declarator_Node( + target='struct_declarator', + option=option, + names=names, + values=values) + + def on_enum_specifier(self, target, option, names, values): + """ + enum_specifier + : ENUM LBRACE enumerator_list RBRACE + | ENUM IDENTIFIER LBRACE enumerator_list RBRACE + | ENUM IDENTIFIER + """ + return enum_specifier_Node( + target='enum_specifier', + option=option, + names=names, + values=values) + + def on_enumerator_list(self, target, option, names, values): + """ + enumerator_list + : enumerator + | enumerator_list COMMA enumerator + """ + return enumerator_list_Node( + target='enumerator_list', + option=option, + names=names, + values=values) + + def on_enumerator(self, target, option, names, values): + """ + enumerator + : IDENTIFIER + | IDENTIFIER ASSIGN constant_expression + """ + return enumerator_Node( + target='enumerator', + option=option, + names=names, + values=values) + + def on_type_qualifier(self, target, option, names, values): + """ + type_qualifier + : CONST + | VOLATILE + """ + return type_qualifier_Node( + target='type_qualifier', + option=option, + names=names, + values=values) + + def on_declarator(self, target, option, names, values): + """ + declarator + : pointer direct_declarator + | direct_declarator + """ + return declarator_Node( + target='declarator', + option=option, + names=names, + values=values) + + def on_direct_declarator(self, target, option, names, values): + """ + direct_declarator + : IDENTIFIER + | LPAREN declarator RPAREN + | direct_declarator LBRACKET constant_expression RBRACKET + | direct_declarator LBRACKET RBRACKET + | direct_declarator LPAREN parameter_type_list RPAREN + | direct_declarator LPAREN identifier_list RPAREN + | direct_declarator LPAREN RPAREN + """ + return direct_declarator_Node( + target='direct_declarator', + option=option, + names=names, + values=values) + + def on_pointer(self, target, option, names, values): + """ + pointer + : STAR + | STAR type_qualifier_list + | STAR pointer + | STAR type_qualifier_list pointer + """ + return pointer_Node( + target='pointer', + option=option, + names=names, + values=values) + + def on_type_qualifier_list(self, target, option, names, values): + """ + type_qualifier_list + : type_qualifier + | type_qualifier_list type_qualifier + """ + return type_qualifier_list_Node( + target='type_qualifier_list', + option=option, + names=names, + values=values) + + def on_parameter_type_list(self, target, option, names, values): + """ + parameter_type_list + : parameter_list + | parameter_list COMMA ELLIPSIS + """ + return parameter_type_list_Node( + target='parameter_type_list', + option=option, + names=names, + values=values) + + def on_parameter_list(self, target, option, names, values): + """ + parameter_list + : parameter_declaration + | parameter_list COMMA parameter_declaration + """ + return parameter_list_Node( + target='parameter_list', + option=option, + names=names, + values=values) + + def on_parameter_declaration(self, target, option, names, values): + """ + parameter_declaration + : declaration_specifiers declarator + | declaration_specifiers abstract_declarator + | declaration_specifiers + """ + return parameter_declaration_Node( + target='parameter_declaration', + option=option, + names=names, + values=values) + + def on_identifier_list(self, target, option, names, values): + """ + identifier_list + : IDENTIFIER + | identifier_list COMMA IDENTIFIER + """ + return identifier_list_Node( + target='identifier_list', + option=option, + names=names, + values=values) + + def on_type_name(self, target, option, names, values): + """ + type_name + : specifier_qualifier_list + | specifier_qualifier_list abstract_declarator + """ + return type_name_Node( + target='type_name', + option=option, + names=names, + values=values) + + def on_abstract_declarator(self, target, option, names, values): + """ + abstract_declarator + : pointer + | direct_abstract_declarator + | pointer direct_abstract_declarator + """ + return abstract_declarator_Node( + target='abstract_declarator', + option=option, + names=names, + values=values) + + def on_direct_abstract_declarator(self, target, option, names, values): + """ + direct_abstract_declarator + : LPAREN abstract_declarator RPAREN + | LBRACKET RBRACKET + | LBRACKET constant_expression RBRACKET + | direct_abstract_declarator LBRACKET RBRACKET + | direct_abstract_declarator LBRACKET constant_expression RBRACKET + | LPAREN RPAREN + | LPAREN parameter_type_list RPAREN + | direct_abstract_declarator LPAREN RPAREN + | direct_abstract_declarator LPAREN parameter_type_list RPAREN + """ + return direct_abstract_declarator_Node( + target='direct_abstract_declarator', + option=option, + names=names, + values=values) + + def on_initializer(self, target, option, names, values): + """ + initializer + : assignment_expression + | LBRACE initializer_list RBRACE + | LBRACE initializer_list COMMA RBRACE + """ + return initializer_Node( + target='initializer', + option=option, + names=names, + values=values) + + def on_initializer_list(self, target, option, names, values): + """ + initializer_list + : initializer + | initializer_list COMMA initializer + """ + return initializer_list_Node( + target='initializer_list', + option=option, + names=names, + values=values) + + def on_statement(self, target, option, names, values): + """ + statement + : labeled_statement + | compound_statement + | expression_statement + | selection_statement + | iteration_statement + | jump_statement + """ + return statement_Node( + target='statement', + option=option, + names=names, + values=values) + + def on_labeled_statement(self, target, option, names, values): + """ + labeled_statement + : IDENTIFIER COLON statement + | CASE constant_expression COLON statement + | DEFAULT COLON statement + """ + return labeled_statement_Node( + target='labeled_statement', + option=option, + names=names, + values=values) + + def on_compound_statement(self, target, option, names, values): + """ + compound_statement + : LBRACE RBRACE + | LBRACE statement_list RBRACE + | LBRACE declaration_list RBRACE + | LBRACE declaration_list statement_list RBRACE + """ + return compound_statement_Node( + target='compound_statement', + option=option, + names=names, + values=values) + + def on_declaration_list(self, target, option, names, values): + """ + declaration_list + : declaration + | declaration_list declaration + """ + return declaration_list_Node( + target='declaration_list', + option=option, + names=names, + values=values) + + def on_statement_list(self, target, option, names, values): + """ + statement_list + : statement + | statement_list statement + """ + return statement_list_Node( + target='statement_list', + option=option, + names=names, + values=values) + + def on_expression_statement(self, target, option, names, values): + """ + expression_statement + : SEMICOLON + | expression SEMICOLON + """ + return expression_statement_Node( + target='expression_statement', + option=option, + names=names, + values=values) + + def on_selection_statement(self, target, option, names, values): + """ + selection_statement + : IF LPAREN expression RPAREN statement + | IF LPAREN expression RPAREN statement ELSE statement + | SWITCH LPAREN expression RPAREN statement + """ + return selection_statement_Node( + target='selection_statement', + option=option, + names=names, + values=values) + + def on_iteration_statement(self, target, option, names, values): + """ + iteration_statement + : WHILE LPAREN expression RPAREN statement + | DO statement WHILE LPAREN expression RPAREN SEMICOLON + | FOR LPAREN expression_statement expression_statement RPAREN statement + | FOR LPAREN expression_statement expression_statement expression RPAREN statement + """ + return iteration_statement_Node( + target='iteration_statement', + option=option, + names=names, + values=values) + + def on_jump_statement(self, target, option, names, values): + """ + jump_statement + : GOTO IDENTIFIER SEMICOLON + | CONTINUE SEMICOLON + | BREAK SEMICOLON + | RETURN SEMICOLON + | RETURN expression SEMICOLON + """ + return jump_statement_Node( + target='jump_statement', + option=option, + names=names, + values=values) + + def on_translation_unit(self, target, option, names, values): + """ + translation_unit + : external_declaration + | translation_unit external_declaration + """ + return translation_unit_Node( + target='translation_unit', + option=option, + names=names, + values=values) + + def on_external_declaration(self, target, option, names, values): + """ + external_declaration + : function_definition + | declaration + """ + return external_declaration_Node( + target='external_declaration', + option=option, + names=names, + values=values) + + def on_function_definition(self, target, option, names, values): + """ + function_definition + : declaration_specifiers declarator declaration_list compound_statement + | declaration_specifiers declarator compound_statement + | declarator declaration_list compound_statement + | declarator compound_statement + """ + return function_definition_Node( + target='function_definition', + option=option, + names=names, + values=values) + + # ----------------------------------------- + # raw lex script, verbatim here + # ----------------------------------------- + lexscript = r""" +D [0-9] +L [a-zA-Z_] +H [a-fA-F0-9] +E [Ee][+-]?{D}+ +FS (f|F|l|L) +IS (u|U|l|L)* + + +%{ + +/* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */ + +void count(); +//int yylineno = 0; +#include +#include +#include "Python.h" +#define YYSTYPE void * +#include "tokens.h" +extern void *py_parser; +extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); +#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyUnicode_FromString(strdup(yytext)); return (tok); +#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } + +%} + + +%% +"/*" { comment(); } + +"auto" { count(); returntoken(AUTO); } +"break" { count(); returntoken(BREAK); } +"case" { count(); returntoken(CASE); } +"char" { count(); returntoken(CHAR); } +"const" { count(); returntoken(CONST); } +"continue" { count(); returntoken(CONTINUE); } +"default" { count(); returntoken(DEFAULT); } +"do" { count(); returntoken(DO); } +"double" { count(); returntoken(DOUBLE); } +"else" { count(); returntoken(ELSE); } +"enum" { count(); returntoken(ENUM); } +"extern" { count(); returntoken(EXTERN); } +"float" { count(); returntoken(FLOAT); } +"for" { count(); returntoken(FOR); } +"goto" { count(); returntoken(GOTO); } +"if" { count(); returntoken(IF); } +"int" { count(); returntoken(INT); } +"long" { count(); returntoken(LONG); } +"register" { count(); returntoken(REGISTER); } +"return" { count(); returntoken(RETURN); } +"short" { count(); returntoken(SHORT); } +"signed" { count(); returntoken(SIGNED); } +"sizeof" { count(); returntoken(SIZEOF); } +"static" { count(); returntoken(STATIC); } +"struct" { count(); returntoken(STRUCT); } +"switch" { count(); returntoken(SWITCH); } +"typedef" { count(); returntoken(TYPEDEF); } +"union" { count(); returntoken(UNION); } +"unsigned" { count(); returntoken(UNSIGNED); } +"void" { count(); returntoken(VOID); } +"volatile" { count(); returntoken(VOLATILE); } +"while" { count(); returntoken(WHILE); } + +{L}({L}|{D})* { count(); returntoken(check_type()); } + +0[xX]{H}+{IS}? { count(); returntoken(CONSTANT); } +0{D}+{IS}? { count(); returntoken(CONSTANT); } +{D}+{IS}? { count(); returntoken(CONSTANT); } +L?'(\\.|[^\\'])+' { count(); returntoken(CONSTANT); } + +{D}+{E}{FS}? { count(); returntoken(CONSTANT); } +{D}*"."{D}+({E})?{FS}? { count(); returntoken(CONSTANT); } +{D}+"."{D}*({E})?{FS}? { count(); returntoken(CONSTANT); } + +L?\"(\\.|[^\\"])*\" { count(); returntoken(STRING_LITERAL); } + +"..." { count(); returntoken(ELLIPSIS); } +">>=" { count(); returntoken(RIGHT_ASSIGN); } +"<<=" { count(); returntoken(LEFT_ASSIGN); } +"+=" { count(); returntoken(ADD_ASSIGN); } +"-=" { count(); returntoken(SUB_ASSIGN); } +"*=" { count(); returntoken(MUL_ASSIGN); } +"/=" { count(); returntoken(DIV_ASSIGN); } +"%=" { count(); returntoken(MOD_ASSIGN); } +"&=" { count(); returntoken(AND_ASSIGN); } +"^=" { count(); returntoken(XOR_ASSIGN); } +"|=" { count(); returntoken(OR_ASSIGN); } +">>" { count(); returntoken(RIGHT_OP); } +"<<" { count(); returntoken(LEFT_OP); } +"++" { count(); returntoken(INC_OP); } +"--" { count(); returntoken(DEC_OP); } +"->" { count(); returntoken(PTR_OP); } +"&&" { count(); returntoken(BOOL_AND_OP); } +"||" { count(); returntoken(BOOL_OR_OP); } +"<=" { count(); returntoken(LE_OP); } +">=" { count(); returntoken(GE_OP); } +"==" { count(); returntoken(EQ_OP); } +"!=" { count(); returntoken(NE_OP); } +";" { count(); returntoken(SEMICOLON); } +("{"|"<%") { count(); returntoken(LBRACE); } +("}"|"%>") { count(); returntoken(RBRACE); } +"," { count(); returntoken(COMMA); } +":" { count(); returntoken(COLON); } +"=" { count(); returntoken(ASSIGN); } +"(" { count(); returntoken(LPAREN); } +")" { count(); returntoken(RPAREN); } +("["|"<:") { count(); returntoken(LBRACKET); } +("]"|":>") { count(); returntoken(RBRACKET); } +"." { count(); returntoken(PERIOD); } +"&" { count(); returntoken(AND_OP); } +"!" { count(); returntoken(BANG); } +"~" { count(); returntoken(TILDE); } +"-" { count(); returntoken(MINUS); } +"+" { count(); returntoken(PLUS); } +"*" { count(); returntoken(STAR); } +"/" { count(); returntoken(SLASH); } +"%" { count(); returntoken(PERCENT); } +"<" { count(); returntoken(LT_OP); } +">" { count(); returntoken(GT_OP); } +"^" { count(); returntoken(CIRCUMFLEX); } +"|" { count(); returntoken(OR_OP); } +"?" { count(); returntoken(QUESTIONMARK); } + +[ \t\v\n\f] { count(); } +. { /* ignore bad characters */ } + +%% + +yywrap() +{ + return(1); +} + + +comment() +{ + char c, c1; + +loop: + while ((c = input()) != '*' && c != 0) + /*putchar(c)*/; + + if ((c1 = input()) != '/' && c != 0) + { + unput(c1); + goto loop; + } + + if (c != 0) + /*putchar(c1)*/; +} + + +int column = 0; + +void count() +{ + int i; + + for (i = 0; yytext[i] != '\0'; i++) + if (yytext[i] == '\n') + column = 0; + else if (yytext[i] == '\t') + column += 8 - (column % 8); + else + column++; + + /*ECHO*/; +} + + +int check_type() +{ +/* +* pseudo code --- this is what it should check +* +* if (yytext == type_name) +* return(TYPE_NAME); +* +* return(IDENTIFIER); +*/ + +/* +* it actually will only return IDENTIFIER +*/ + + return(IDENTIFIER); +} + + + """ + # ----------------------------------------- + # end raw lex script + # ----------------------------------------- + +def usage(): + print ('%s: PyBison parser derived from %s and %s' % (sys.argv[0], bisonFile, lexFile)) + print ('Usage: %s [-k] [-v] [-d] [filename]' % sys.argv[0]) + print (' -k Keep temporary files used in building parse engine lib') + print (' -v Enable verbose messages while parser is running') + print (' -d Enable garrulous debug messages from parser engine') + print (' filename path of a file to parse, defaults to stdin') + +def main(*args): + """ + Unit-testing func + """ + + keepfiles = 0 + verbose = 0 + debug = 0 + filename = None + + for s in ['-h', '-help', '--h', '--help', '-?']: + if s in args: + usage() + sys.exit(0) + + if len(args) > 0: + if '-k' in args: + keepfiles = 1 + args.remove('-k') + if '-v' in args: + verbose = 1 + args.remove('-v') + if '-d' in args: + debug = 1 + args.remove('-d') + if len(args) > 0: + filename = args[0] + + p = Parser(verbose=verbose, keepfiles=keepfiles) + tree = p.run(file=filename, debug=debug) + return tree + +if __name__ == '__main__': + main(*(sys.argv[1:])) + diff --git a/examples/calc/calc.py b/examples/calc/calc.py index 9a27348..edd5d5d 100755 --- a/examples/calc/calc.py +++ b/examples/calc/calc.py @@ -2,7 +2,10 @@ """ A simple pybison parser program implementing a calculator """ +from __future__ import absolute_import +from __future__ import print_function from bison import BisonParser +from six.moves import input class Parser(BisonParser): @@ -33,7 +36,7 @@ class Parser(BisonParser): # ------------------------------------------------------------------ def read(self, nbytes): try: - return raw_input("> ") + "\n" + return input("> ") + "\n" except EOFError: return '' @@ -63,7 +66,7 @@ def on_line(self, target, option, names, values): | exp NEWLINE """ if option == 1: - print values[0] + print(values[0]) def on_exp(self, target, option, names, values): """ @@ -109,7 +112,7 @@ def on_exp(self, target, option, names, values): extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); #define returntoken(tok) \ - yylval = PyString_FromString(strdup(yytext)); return (tok); + yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { \ (*py_input)(py_parser, buf, &result, max_size); \ } diff --git a/examples/calc/run.py b/examples/calc/run.py index f796000..f1ed1e1 100755 --- a/examples/calc/run.py +++ b/examples/calc/run.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +from __future__ import absolute_import import sys sys.path.insert(0, '../../build/lib.linux-x86_64-2.7/') diff --git a/examples/calc1/calc1.py b/examples/calc1/calc1.py index ef2b365..807eb8f 100755 --- a/examples/calc1/calc1.py +++ b/examples/calc1/calc1.py @@ -3,9 +3,12 @@ A more advanced calculator example, with variable storage and scientific functions (courtesy of python 'math' module) """ +from __future__ import absolute_import +from __future__ import print_function import math from bison import BisonParser +from six.moves import input class Parser(BisonParser): @@ -13,6 +16,11 @@ class Parser(BisonParser): Implements the calculator parser. Grammar rules are defined in the method docstrings. Scanner rules are in the 'lexscript' attribute. """ + verbose = True + debugSymbols=True + keepfiles = True + import os + #os.environ['LINK'] = '/debug' # ---------------------------------------------------------------- # lexer tokens - these must match those in your lex script (below) # ---------------------------------------------------------------- @@ -44,7 +52,7 @@ class Parser(BisonParser): # ------------------------------------------------------------------ def read(self, nbytes): try: - return raw_input("> ") + "\n" + return input("> ") + "\n" except EOFError: return '' @@ -85,7 +93,7 @@ def on_line(self, target, option, names, values): | error """ if option == 1: - print values[0] + print(values[0]) return values[0] elif option == 2: self.vars[values[0]] = values[2] @@ -94,7 +102,7 @@ def on_line(self, target, option, names, values): self.show_help() elif option == 4: line, msg, near = self.lasterror - print "Line %s: \"%s\" near %s" % (line, msg, repr(near)) + print("Line %s: \"%s\" near %s" % (line, msg, repr(near))) def on_exp(self, target, option, names, values): """ @@ -142,7 +150,7 @@ def on_modexp(self, target, option, names, values): """ try: return values[0] % values[2] - except: + except Exception as e: return self.error("Modulus by zero error") def on_powexp(self, target, option, names, values): @@ -167,21 +175,24 @@ def on_varexp(self, target, option, names, values): """ varexp : IDENTIFIER """ - if self.vars.has_key(values[0]): + if values[0] in self.vars: return self.vars[values[0]] else: + print("error: no such variable", values[0]) + return(Exception("")) return self.error("No such variable '%s'" % values[0]) def on_functioncall(self, target, option, names, values): """ functioncall : IDENTIFIER LPAREN exp RPAREN """ + # print(values) func = getattr(math, values[0], None) if not callable(func): return self.error("No such function '%s'" % values[0]) try: return func(values[2]) - except Exception, e: + except Exception as e: return self.error(e.args[0]) def on_constant(self, target, option, names, values): @@ -190,17 +201,23 @@ def on_constant(self, target, option, names, values): | E """ return getattr(math, values[0]) + def on_myquit(self, target, option, names, values): + """ + myquit : QUIT + """ + print("i am in quit") + return -1 # ----------------------------------------- # Display help # ----------------------------------------- def show_help(self): - print "This PyBison parser implements a basic scientific calculator" - print " * scientific notation now works for numbers, eg '2.3e+12'" - print " * you can assign values to variables, eg 'x = 23.2'" - print " * the constants 'pi' and 'e' are supported" - print " * all the python 'math' module functions are available, eg 'sin(pi/6)'" - print " * errors, such as division by zero, are now reported" + print("This PyBison parser implements a basic scientific calculator") + print(" * scientific notation now works for numbers, eg '2.3e+12'") + print(" * you can assign values to variables, eg 'x = 23.2'") + print(" * the constants 'pi' and 'e' are supported") + print(" * all the python 'math' module functions are available, eg 'sin(pi/6)'") + print(" * errors, such as division by zero, are now reported") # ----------------------------------------- # raw lex script, verbatim here @@ -214,7 +231,7 @@ def show_help(self): #include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); - #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); + #define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } %} @@ -230,7 +247,7 @@ def show_help(self): "**" { returntoken(POW); } "/" { returntoken(DIVIDE); } "%" { returntoken(MOD); } - "quit" { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); } + "quit" { printf("lex: got QUIT\n"); returntoken(QUIT); } "=" { returntoken(EQUALS); } "e" { returntoken(E); } "pi" { returntoken(PI); } @@ -248,5 +265,5 @@ def show_help(self): if __name__ == '__main__': p = Parser(keepfiles=0) - print "Scientific calculator example. Type 'help' for help" + print("Scientific calculator example. Type 'help' for help") p.run() diff --git a/examples/java/javaparser.l b/examples/java/javaparser.l index f0199d2..16e2dbd 100644 --- a/examples/java/javaparser.l +++ b/examples/java/javaparser.l @@ -10,7 +10,7 @@ Escchar \\{D}({D}?{D})? Escunichar \\u{H}{H}{H}{H} %{ -int yylineno = 0; +//int yylineno = 0; #include #include #include "Python.h" @@ -18,10 +18,12 @@ int yylineno = 0; #include "tokens.h" extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok); +#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } #include "table.h" +void comment(); +void commentold(); %} @@ -147,7 +149,7 @@ int Table_Lookup(struct KeywordToken Table[]) return ID_TOKEN; } -commentold() +void commentold() { char c = -1, c1; while(c != 0) @@ -164,7 +166,7 @@ commentold() } } -comment() +void comment() { int prev=-1, cur=-1; diff --git a/examples/java/run.py b/examples/java/run.py index 94ba609..250875a 100755 --- a/examples/java/run.py +++ b/examples/java/run.py @@ -2,6 +2,8 @@ """ Runs the java parser on a small java source file """ +from __future__ import absolute_import +from __future__ import print_function import sys import javaparser @@ -23,16 +25,16 @@ else: src = None -src = "I2PClient.java" +src = "HelloWorldApp.java" p = javaparser.Parser(verbose=verbose) -print "delmebld.py: running parser on HelloWorldApp.java" +print("delmebld.py: running parser on HelloWorldApp.java") res = p.run(file=src) -print "back from engine, parse tree dump follows:" +print("back from engine, parse tree dump follows:") if 0: - print "------------------------------------------" + print("------------------------------------------") res.dump() - print "------------------------------------------" - print "end of parse tree dump" + print("------------------------------------------") + print("end of parse tree dump") diff --git a/examples/template/template.py b/examples/template/template.py index 9d87973..f4d0e9c 100755 --- a/examples/template/template.py +++ b/examples/template/template.py @@ -11,6 +11,8 @@ somewhere, and tinker away to your heart's content. """ +from __future__ import absolute_import +from __future__ import print_function import sys from bison import BisonParser, BisonNode @@ -105,7 +107,7 @@ def on_someTarget(self, target, option, names, values): : | someTarget WORD """ - print "on_someTarget: %s %s" % (option, repr(values)) + print("on_someTarget: %s %s" % (option, repr(values))) node = someTarget_Node(target=target, option=option, names=names, @@ -126,11 +128,11 @@ def on_someTarget(self, target, option, names, values): #include "Python.h" #define YYSTYPE void * #include "tokens.h" -int yylineno = 0; +//int yylineno = 0; int yywrap() { return(1); } extern void *py_parser; extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); +#define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } %} @@ -154,12 +156,12 @@ def on_someTarget(self, target, option, names, values): # -------------------------------------------------- def usage(): - print "%s: PyBison template parser" % sys.argv[0] - print "Usage: %s [-k] [-v] [-d] [filename]" % sys.argv[0] - print " -k Keep temporary files used in building parse engine lib" - print " -v Enable verbose messages while parser is running" - print " -d Enable garrulous debug messages from parser engine" - print " filename path of a file to parse, defaults to stdin" + print("%s: PyBison template parser" % sys.argv[0]) + print("Usage: %s [-k] [-v] [-d] [filename]" % sys.argv[0]) + print(" -k Keep temporary files used in building parse engine lib") + print(" -v Enable verbose messages while parser is running") + print(" -d Enable garrulous debug messages from parser engine") + print(" filename path of a file to parse, defaults to stdin") def main(*args): """ @@ -192,7 +194,7 @@ def main(*args): p = Parser(verbose=verbose, keepfiles=keepfiles) if filename == None: - print "(Reading from standard input - please type stuff)" + print("(Reading from standard input - please type stuff)") tree = p.run(file=filename, debug=debug) return tree diff --git a/mytest.py b/mytest.py new file mode 100644 index 0000000..f8aab83 --- /dev/null +++ b/mytest.py @@ -0,0 +1,87 @@ +from bison import BisonParser + +class Parser(BisonParser): + + + def __init__(self, **kwargs): + self.bisonEngineLibName = self.__class__.__name__ + '_engine' + + tokens = [[x.strip() for x in y.split('=')] + for y in self.__doc__.split('\n') + if y.strip() != ''] + + self.precedences = ( + ) + + self.start = "someTarget" + + lex_rules = '\n'.join(["{} {{ returntoken({}); }}" + .format(*x) if x[1][0] != '_' else + "{} {{ {} }}".format(x[0], x[1][1:]) + for x in tokens]) + + self.tokens = list(set([x[1] for x in tokens if not x[1].startswith('_')])) + self.lexscript = r""" +%{ +#include +#include +#include "Python.h" +#define YYSTYPE void * +#include "tokens.h" +int yylineno = 0; +int yywrap() { return(1); } +extern void *py_parser; +extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); +#define returntoken(tok) yylval = PyUnicode_FromString(strdup(yytext)); return (tok); +#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } +%} + +%% +""" + lex_rules + """ + +%% + """ + print(self.lexscript) + super(Parser, self).__init__(**kwargs) + + + + +class MyParser(Parser): + r""" + quit = QUIT + [a-zA-Z0-9]+ = WORD + ([0-9]*\.?)([0-9]+)(e[-+]?[0-9]+)? = NUMBER + ([0-9]+)(\.?[0-9]*)(e[-+]?[0-9]+)? = NUMBER + \( = LPAREN + \) = RPAREN + \n = _yylineno++; + [ \t] = _ + . = _ + """ + + def on_someTarget(self, target, option, names, values): + """ + someTarget + : paren_expr + | someTarget WORD + | someTarget QUIT + """ + print("on_someTarget: %s %s %s" % (option, names, repr(values))) + if option == 1: + return values[1] + elif option == 2: + print("quit!") + return 0 + + def on_paren_expr(self, target, option, names, values): + """ + paren_expr : LPAREN WORD RPAREN + """ + print("PARENTHESISED", values) + return values[1] + + + +p = MyParser(verbose=False, debugSymbols=True) +p.run(file='foo', debug=0) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c8c5591 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +cython +six + diff --git a/setup.py b/setup.py index 2b584b5..624c6cc 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,14 @@ Builds bison python module """ +from __future__ import absolute_import +from __future__ import print_function version = '0.1' -from distutils.core import setup -from distutils.extension import Extension -from Pyrex.Distutils import build_ext +from setuptools import setup +from setuptools import Extension +from Cython.Distutils import build_ext +from setuptools import find_packages import sys @@ -46,12 +49,17 @@ bisondynlibModule], libraries=libs, extra_compile_args=['-Wall', '-Wextra'], - extra_link_args=extra_link_args, + # extra_compile_args=['/Od','/Zi','-D__builtin_expect(a,b)=(a)'], extra_link_args=extra_link_args, ) ], + #packages=find_packages(), packages=['bison'], - package_dir={'bison': 'src/python'}, + package_dir={'': 'src'}, #py_modules=['node', 'xmlifier', 'convert'], cmdclass={'build_ext': build_ext}, scripts=[bison2pyscript], + install_requires=[ + "cython", + "six" + ], ) diff --git a/src/python/__init__.py b/src/bison/__init__.py similarity index 88% rename from src/python/__init__.py rename to src/bison/__init__.py index 7851b17..da94bb4 100644 --- a/src/python/__init__.py +++ b/src/bison/__init__.py @@ -17,6 +17,8 @@ for a commercial license. """ +from __future__ import absolute_import +from __future__ import print_function import sys import traceback @@ -148,7 +150,7 @@ def __init__(self, **kw): self.verbose = kw.get('verbose', 0) - if kw.has_key('keepfiles'): + if 'keepfiles' in kw: self.keepfiles = kw['keepfiles'] # if engine lib name not declared, invent ont @@ -174,22 +176,26 @@ def _handle(self, targetname, option, names, values): if handler: if self.verbose: try: - hdlrline = handler.func_code.co_firstlineno + hdlrline = handler.__code__.co_firstlineno except: - hdlrline = handler.__init__.func_code.co_firstlineno + hdlrline = handler.__init__.__code__.co_firstlineno - print 'BisonParser._handle: call handler at line %s with: %s' \ - % (hdlrline, str((targetname, option, names, values))) - - self.last = handler(target=targetname, option=option, names=names, - values=values) + print('BisonParser._handle: call handler at line %s with: %s' \ + % (hdlrline, str((targetname, option, names, values)))) + try: + self.last = handler(target=targetname, option=option, names=names, + values=values) + except Exception as e: + print("returning exception", e, targetname, option, names, values) + self.last = e + return e #if self.verbose: - # print 'handler for %s returned %s' \ - # % (targetname, repr(self.last)) + # print ('handler for %s returned %s' \ + # % (targetname, repr(self.last))) else: if self.verbose: - print 'no handler for %s, using default' % targetname + print ('no handler for %s, using default' % targetname) cls = self.default_node_class self.last = cls(target=targetname, option=option, names=names, @@ -214,7 +220,7 @@ def run(self, **kw): - debug - enables garrulous parser debugging output, default 0 """ if self.verbose: - print 'Parser.run: calling engine' + print('Parser.run: calling engine') # grab keywords fileobj = kw.get('file', self.file) @@ -243,7 +249,7 @@ def run(self, **kw): self.read = read if self.verbose and self.file.closed: - print 'Parser.run(): self.file', self.file, 'is closed' + print('Parser.run(): self.file', self.file, 'is closed') error_count = 0 @@ -264,23 +270,23 @@ def run(self, **kw): self.report_last_error(filename, e) if self.verbose: - print 'Parser.run: back from engine' + print('Parser.run: back from engine') if hasattr(self, 'hook_run'): self.last = self.hook_run(filename, self.last) if self.verbose and not self.file.closed: - print 'last:', self.last + print('last:', self.last) if self.verbose: - print 'last:', self.last + print('last:', self.last) # restore old values self.file = oldfile self.read = oldread if self.verbose: - print '------------------ result=', self.last + print('------------------ result=', self.last) # TODO: return last result (see while loop): # return self.last[:-1] @@ -297,12 +303,13 @@ def read(self, nbytes): """ # default to stdin if self.verbose: - print 'Parser.read: want %s bytes' % nbytes + print('Parser.read: want %s bytes' % nbytes) bytes = self.file.readline(nbytes) if self.verbose: - print 'Parser.read: got %s bytes' % len(bytes) + print('Parser.read: got %s bytes' % len(bytes)) + print(bytes) return bytes @@ -343,7 +350,7 @@ def report_last_error(self, filename, error): if self.verbose: traceback.print_exc() - print 'ERROR:', error + print('ERROR:', error) def report_syntax_error(self, msg, yytext, first_line, first_col, last_line, last_col): diff --git a/src/python/convert.py b/src/bison/convert.py similarity index 92% rename from src/python/convert.py rename to src/bison/convert.py index 21b295f..97ae3ca 100644 --- a/src/python/convert.py +++ b/src/bison/convert.py @@ -16,10 +16,14 @@ depart from the GPL licensing requirements, please contact the author and apply for a commercial license. """ +from __future__ import absolute_import +from __future__ import print_function import re import os from bison_ import unquoted +from six.moves import filter +from six.moves import map reSpaces = re.compile('\\s+') @@ -42,19 +46,19 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): """ # try to create output file try: - pyfile = file(pyfileName, 'w') + pyfile = open(pyfileName, 'w') except: raise Exception('Cannot create output file "%s"' % pyfileName) # try to open/read the bison file try: - rawBison = file(bisonfileName).read() + rawBison = open(bisonfileName).read() except: raise Exception('Cannot open bison file "%s"' % bisonfileName) # try to open/read the lex file try: - rawLex = file(lexfileName).read() + rawLex = open(lexfileName).read() except: raise Exception('Cannot open lex file %s' % lexfileName) @@ -105,14 +109,14 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): rules = [] for rule in rulesLines: - #print '--' - #print repr(rule) + #print ('--') + #print (repr(rule)) #tgt, terms = rule.split(':') try: tgt, terms = re.split(unquoted % ':', rule) except ValueError: - print 'Error in rule: %s' % rule + print ('Error in rule: %s' % rule) raise tgt, terms = tgt.strip(), terms.strip() @@ -253,7 +257,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): pyfile.write(' # ----------------------------------------------------------------\n') pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n') pyfile.write(' # ----------------------------------------------------------------\n') - pyfile.write(' tokens = %s\n\n' % tmp) + pyfile.write(' tokens = %s\n\n' % tokens) # add the precedences pyfile.write(' # ------------------------------\n') @@ -264,7 +268,7 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): #precline = ', '.join(prec[1]) pyfile.write(' (\'%s\', %s,),\n' % ( prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s' - tmp, # quote-wrapped targets + prec[1], # quote-wrapped targets ) ) pyfile.write(' )\n\n'), @@ -331,12 +335,12 @@ def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): # and now, create a main for testing which either reads stdin, or a filename arg pyfile.write('\n'.join([ 'def usage():', - ' print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)', - ' print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]', - ' print \' -k Keep temporary files used in building parse engine lib\'', - ' print \' -v Enable verbose messages while parser is running\'', - ' print \' -d Enable garrulous debug messages from parser engine\'', - ' print \' filename path of a file to parse, defaults to stdin\'', + ' print (\'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile))', + ' print (\'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0])', + ' print (\' -k Keep temporary files used in building parse engine lib\')', + ' print (\' -v Enable verbose messages while parser is running\')', + ' print (\' -d Enable garrulous debug messages from parser engine\')', + ' print (\' filename path of a file to parse, defaults to stdin\')', '', 'def main(*args):', ' """', diff --git a/src/python/node.py b/src/bison/node.py similarity index 94% rename from src/python/node.py rename to src/bison/node.py index 62b2210..9874592 100644 --- a/src/python/node.py +++ b/src/bison/node.py @@ -16,7 +16,10 @@ depart from the GPL licensing requirements, please contact the author and apply for a commercial license. """ +from __future__ import absolute_import +from __future__ import print_function import xml +from six.moves import zip class BisonNode: """ @@ -64,7 +67,7 @@ def __getitem__(self, item): node's children, the second element as an index into that child node's children, and so on """ - if type(item) in [type(0), type(0L)]: + if type(item) in [type(0)]: return self.values[item] elif type(item) in [type(()), type([])]: if len(item) == 0: @@ -90,8 +93,8 @@ def dump(self, indent=0): """ specialAttribs = ['option', 'target', 'names', 'values'] indents = ' ' * indent * 2 - #print "%s%s: %s %s" % (indents, self.target, self.option, self.names) - print '%s%s:' % (indents, self.target) + #print ("%s%s: %s %s" % (indents, self.target, self.option, self.names)) + print ('%s%s:' % (indents, self.target)) for name, val in self.kw.items() + zip(self.names, self.values): if name in specialAttribs or name.startswith('_'): @@ -100,7 +103,7 @@ def dump(self, indent=0): if isinstance(val, BisonNode): val.dump(indent + 1) else: - print indents + ' %s=%s' % (name, val) + print(indents + ' %s=%s' % (name, val)) def toxml(self): """ diff --git a/src/python/xmlifier.py b/src/bison/xmlifier.py similarity index 87% rename from src/python/xmlifier.py rename to src/bison/xmlifier.py index 1594b70..dc6d34e 100644 --- a/src/python/xmlifier.py +++ b/src/bison/xmlifier.py @@ -20,6 +20,7 @@ # TODO: use cElementTree instead of Python's xml module. # TODO: test this module, since it is currently only moved to another file. +from __future__ import absolute_import import xml.dom import xml.dom.minidom import types @@ -95,7 +96,7 @@ def loadxmlobj(self, xmlobj, namespace=None): classname = objname + '_Node' classobj = namespace.get(classname, None) - namespacekeys = namespace.keys() + namespacekeys = list(namespace.keys()) # barf if node is not a known parse node or token if (not classobj) and objname not in self.tokens: @@ -111,24 +112,24 @@ def loadxmlobj(self, xmlobj, namespace=None): else: nodeobj = None - #print '----------------' - #print 'objname=%s' % repr(objname) - #print 'classname=%s' % repr(classname) - #print 'classobj=%s' % repr(classobj) - #print 'nodeobj=%s' % repr(nodeobj) + #print ('----------------') + #print ('objname=%s' % repr(objname)) + #print ('classname=%s' % repr(classname)) + #print ('classobj=%s' % repr(classobj)) + #print ('nodeobj=%s' % repr(nodeobj)) # now add the children for child in xmlobj.childNodes: - #print '%s attributes=%s' % (child, child.attributes.items()) + #print ('%s attributes=%s' % (child, child.attributes.items())) childname = child.attributes['target'].value - #print 'childname=%s' % childname + #print ('childname=%s' % childname) if childname + '_Node' in namespacekeys: - #print 'we have a node for class %s' % classname + #print ('we have a node for class %s' % classname) childobj = self.loadxmlobj(child, namespace) else: # it's a token childobj = child.childNodes[0].nodeValue - #print 'got token %s=%s' % (childname, childobj) + #print ('got token %s=%s' % (childname, childobj)) nodeobj.names.append(childname) nodeobj.values.append(childobj) diff --git a/src/c/bison_callback.c b/src/c/bison_callback.c index b5b03be..be7938f 100644 --- a/src/c/bison_callback.c +++ b/src/c/bison_callback.c @@ -30,8 +30,13 @@ #include #include +#ifdef _WIN32 +#define likely(x) (x) +#define unlikely(x) (x) +#else #define likely(x) __builtin_expect((x),1) #define unlikely(x) __builtin_expect((x),0) +#endif static PyObject *py_attr_hook_handler_name; static PyObject *py_attr_hook_read_after_name; @@ -46,7 +51,7 @@ static PyObject *py_attr_close_name; // TODO: where do we Py_DECREF(handle_name) ?? #define INIT_ATTR(variable, name, failure) \ if (unlikely(!variable)) { \ - variable = PyString_FromString(name); \ + variable = PyUnicode_FromString(name); \ if (!variable) failure; \ } @@ -75,10 +80,18 @@ PyObject* py_callback(PyObject *parser, char *target, int option, int nargs, // Construct the names and values list from the variable argument list. for(i = 0; i < nargs; i++) { - PyObject *name = PyString_FromString(va_arg(ap, char *)); + PyObject *name = PyUnicode_FromString(va_arg(ap, char *)); + if(!name){ + Py_INCREF(Py_None); + name = Py_None; + } PyList_SetItem(names, i, name); PyObject *value = va_arg(ap, PyObject *); + if(!value){ + Py_INCREF(Py_None); + value = Py_None; + } Py_INCREF(value); PyList_SetItem(values, i, value); } @@ -121,6 +134,11 @@ PyObject* py_callback(PyObject *parser, char *target, int option, int nargs, res = PyObject_CallObject(handle, arglist); + PyObject *exc = PyErr_Occurred(); + if(unlikely(exc)){ + printf("exception in callback!!\n"); + return -1; + } Py_DECREF(handle); Py_DECREF(arglist); @@ -202,7 +220,10 @@ void py_input(PyObject *parser, char *buf, int *result, int max_size) finish_input: // Copy the read python input string to the buffer - bufstr = PyString_AsString(res); + bufstr = PyUnicode_AsUTF8(res); + if(!bufstr){ + return; + } *result = strlen(bufstr); memcpy(buf, bufstr, *result); diff --git a/src/c/bisondynlib-win32.c b/src/c/bisondynlib-win32.c index 0205700..d448dd3 100644 --- a/src/c/bisondynlib-win32.c +++ b/src/c/bisondynlib-win32.c @@ -12,44 +12,53 @@ //#include "dlluser.h" + +void (*reset_flex_buffer)(void) = NULL; + + void * bisondynlib_open(char *filename) { - HINSTANCE hinstLib; + HINSTANCE hinstLib; hinstLib = LoadLibrary(filename); - + reset_flex_buffer = (void (*)(void)) GetProcAddress(hinstLib, "reset_flex_buffer"); return (void *)hinstLib; } int bisondynlib_close(void *handle) { - return FreeLibrary((HINSTANCE)handle); + return FreeLibrary((HINSTANCE)handle); } char * bisondynlib_err() { return NULL; } - +void bisondynlib_reset(void) +{ + if (reset_flex_buffer) + reset_flex_buffer(); +} char * bisondynlib_lookup_hash(void *handle) { char *hash; - - hash = (char *)GetProcAddress((HINSTANCE)handle, "rules_hash"); + // rules_hash is a pointer, GetProcAddress returns the pointer's address + // so it needs to be dereferenced + hash = *((char **)GetProcAddress((HINSTANCE)handle, "rules_hash")); printf("bisondynlib_lookup_hash: hash=%s\n", hash); return hash; } -PyObject * bisondynlib_run(void *handle, PyObject *parser, char *filename, void *cb) +PyObject * bisondynlib_run(void *handle, PyObject *parser, void *cb, void *in, int debug) { - PyObject *(*pparser)(PyObject *, char *, void *); + PyObject *(*pparser)(PyObject *, void *, void *, int); //printf("bisondynlib_run: looking up parser\n"); pparser = bisondynlib_lookup_parser(handle); //printf("bisondynlib_run: calling parser\n"); - (*pparser)(parser, filename, cb); + (*pparser)(parser, cb, in, debug); //printf("bisondynlib_run: back from parser\n"); //return result; @@ -61,12 +70,12 @@ PyObject * bisondynlib_run(void *handle, PyObject *parser, char *filename, void /* * function(void *) returns a pointer to a function(PyObject *, char *) returning PyObject* */ -PyObject *(*bisondynlib_lookup_parser(void *handle))(PyObject *, char *, void *) +PyObject *(*bisondynlib_lookup_parser(void *handle))(PyObject *, void *, void *, int) { //void *pparser; - PyObject *(*pparser)(PyObject *, char *, void *); - - pparser = (PyObject *(*)(PyObject *, char *, void *))GetProcAddress((HINSTANCE)handle, "do_parse"); + PyObject *(*pparser)(PyObject *, void *, void *, int); + + pparser = (PyObject *(*)(PyObject *, void *, void *, int))GetProcAddress((HINSTANCE)handle, "do_parse"); return pparser; } diff --git a/src/pyrex/bison_.pyx b/src/pyrex/bison_.pyx index 293256f..63ced7e 100644 --- a/src/pyrex/bison_.pyx +++ b/src/pyrex/bison_.pyx @@ -1,11 +1,12 @@ +# cython: language_level=3 """ Pyrex-generated portion of pybison """ cdef extern from "Python.h": - object PyString_FromStringAndSize(char *, int) - object PyString_FromString(char *) - char *PyString_AsString(object o) + object PyUnicode_FromStringAndSize(char *, int) + object PyUnicode_FromString(char *) + char *PyUnicode_AsUTF8(object o) object PyInt_FromLong(long ival) long PyInt_AsLong(object io) @@ -62,12 +63,13 @@ cdef extern from "../c/bisondynlib.h": #int bisondynlib_build(char *libName, char *includedir) -import sys, os, sha, re, imp, traceback +import sys, os, hashlib, re, imp, traceback import shutil import distutils.sysconfig import distutils.ccompiler +# os.unlink = lambda x: x # What for? reSpaces = re.compile("\\s+") #unquoted = r"""^|[^'"]%s[^'"]?""" @@ -136,7 +138,6 @@ cdef class ParserEngine: """ parser = self.parser verbose = parser.verbose - if verbose: distutils.log.set_verbosity(1) @@ -146,18 +147,18 @@ cdef class ParserEngine: self.openLib() # hash our parser spec, compare to hash val stored in lib - libHash = PyString_FromString(self.libHash) + libHash = PyUnicode_FromString(self.libHash) if self.parserHash != libHash: if verbose: - print "Hash discrepancy, need to rebuild bison lib" - print " current parser class: %s" % self.parserHash - print " bison library: %s" % libHash + print ("Hash discrepancy, need to rebuild bison lib") + print (" current parser class: %s" % self.parserHash) + print (" bison library: %s" % libHash) self.closeLib() self.buildLib() self.openLib() else: if verbose: - print "Hashes match, no need to rebuild bison engine lib" + print ("Hashes match, no need to rebuild bison engine lib") def openLib(self): """ @@ -180,12 +181,12 @@ cdef class ParserEngine: cdef void *handle # convert python filename string to c string - libFilename = PyString_AsString(self.libFilename_py) + libFilename = PyUnicode_AsUTF8(self.libFilename_py) parser = self.parser if parser.verbose: - print 'Opening library %s' % self.libFilename_py + print ('Opening library %s' % self.libFilename_py) handle = bisondynlib_open(libFilename) self.libHandle = handle err = bisondynlib_err() @@ -197,7 +198,7 @@ cdef class ParserEngine: self.libHash = bisondynlib_lookup_hash(handle) if parser.verbose: - print 'Successfully loaded library' + print ('Successfully loaded library') def generate_exception_handler(self): s = '' @@ -238,7 +239,7 @@ cdef class ParserEngine: method = getattr(parser, a) gHandlers.append(method) - gHandlers.sort(cmpLines) + gHandlers.sort(key=keyLines) # get start symbol, tokens, precedences, lex script gStart = parser.start @@ -254,7 +255,7 @@ cdef class ParserEngine: os.unlink(buildDirectory + parser.bisonFile) if parser.verbose: - print 'generating bison file:', buildDirectory + parser.bisonFile + print ('generating bison file:', buildDirectory + parser.bisonFile) f = open(buildDirectory + parser.bisonFile, "w") write = f.write @@ -274,6 +275,7 @@ cdef class ParserEngine: 'void (*py_input)(void *, char *, int *, int);', 'void *py_parser;', 'char *rules_hash = "%s";' % self.parserHash, + # '__declspec(dllexport) char *rules_hash = "%s";' % self.parserHash, '#define YYERROR_VERBOSE 1', '', '}', @@ -318,10 +320,10 @@ cdef class ParserEngine: #target, options = doc.split(":") doc = re.sub(unquoted % ";", "", doc) - #print "---------------------" + #print ("---------------------") s = re.split(unquoted % ":", doc) - #print "s=%s" % s + #print ("s=%s" % s) target, options = s target = target.strip() @@ -329,13 +331,13 @@ cdef class ParserEngine: options = options.strip() tmp = [] - #print "options = %s" % repr(options) + #print ("options = %s" % repr(options)) #opts = options.split("|") - ##print "opts = %s" % repr(opts) + ##print ("opts = %s" % repr(opts)) r = unquoted % r"\|" - #print "r = <%s>" % r + #print ("r = <%s>" % r) opts1 = re.split(r, " " + options) - #print "opts1 = %s" % repr(opts1) + #print ("opts1 = %s" % repr(opts1)) for o in opts1: o = o.strip() @@ -386,7 +388,8 @@ cdef class ParserEngine: action = action + " Py_INCREF(Py_None);\n" action = action + " yyclearin;\n" - action = action + self.generate_exception_handler() + else: + action = action + self.generate_exception_handler() action = action + ' }\n' @@ -401,6 +404,7 @@ cdef class ParserEngine: # now generate C code epilogue = '\n'.join([ 'void do_parse(void *parser1,', + # '__declspec(dllexport) void do_parse(void *parser1,', ' void *(*cb)(void *, char *, int, int, ...),', ' void (*in)(void *, char*, int *, int),', ' int debug', @@ -414,7 +418,9 @@ cdef class ParserEngine: '}', '', 'int yyerror(char *msg)', - '{', + '{ fprintf(stderr, "error!\\n");', + ' PyObject *error = PyErr_Occurred();', + ' if(error) PyErr_Clear();', ' PyObject *fn = PyObject_GetAttrString((PyObject *)py_parser,', ' "report_syntax_error");', ' if (!fn)', @@ -459,10 +465,21 @@ cdef class ParserEngine: f = open(buildDirectory + parser.flexFile, 'w') f.write('\n'.join(tmp) + '\n') f.close() - + # create and set up a compiler object - env = distutils.ccompiler.new_compiler(verbose=parser.verbose) - env.set_include_dirs([distutils.sysconfig.get_python_inc()]) + if sys.platform == 'win32': + env = distutils.ccompiler.new_compiler(verbose=parser.verbose) + env.initialize() + env.set_include_dirs([distutils.sysconfig.get_python_inc(), + r'D:\Tools\VC14\include', + r'D:\Tools\VC14\sdk\include']) + env.set_libraries(['python{v.major}{v.minor}'.format(v=sys.version_info)]) + env.set_library_dirs([os.path.join(sys.prefix, 'libs'), + r'D:\Tools\VC14\lib\amd64', + r'D:\Tools\VC14\sdk\lib\x64',]) + else: + env = distutils.ccompiler.new_compiler(verbose=parser.verbose) + env.set_include_dirs([distutils.sysconfig.get_python_inc()]) # ----------------------------------------- # Now run bison on the grammar file @@ -470,16 +487,16 @@ cdef class ParserEngine: bisonCmd = parser.bisonCmd + [buildDirectory + parser.bisonFile] if parser.verbose: - print 'bison cmd:', ' '.join(bisonCmd) + print ('bison cmd:', ' '.join(bisonCmd)) env.spawn(bisonCmd) if parser.verbose: - print 'renaming bison output files' - print '%s => %s%s' % (parser.bisonCFile, buildDirectory, - parser.bisonCFile1) - print '%s => %s%s' % (parser.bisonHFile, buildDirectory, - parser.bisonHFile1) + print ('renaming bison output files') + print ('%s => %s%s' % (parser.bisonCFile, buildDirectory, + parser.bisonCFile1)) + print ('%s => %s%s' % (parser.bisonHFile, buildDirectory, + parser.bisonHFile1)) if os.path.isfile(buildDirectory + parser.bisonCFile1): os.unlink(buildDirectory + parser.bisonCFile1) @@ -497,7 +514,7 @@ cdef class ParserEngine: flexCmd = parser.flexCmd + [buildDirectory + parser.flexFile] if parser.verbose: - print 'flex cmd:', ' '.join(flexCmd) + print ('flex cmd:', ' '.join(flexCmd)) env.spawn(flexCmd) @@ -505,8 +522,8 @@ cdef class ParserEngine: os.unlink(buildDirectory + parser.flexCFile1) if parser.verbose: - print '%s => %s%s' % (parser.flexCFile, buildDirectory, - parser.flexCFile1) + print ('%s => %s%s' % (parser.flexCFile, buildDirectory, + parser.flexCFile1)) shutil.copy(parser.flexCFile, buildDirectory + parser.flexCFile1) @@ -527,7 +544,6 @@ cdef class ParserEngine: extra_preargs=parser.cflags_pre, extra_postargs=parser.cflags_post, debug=parser.debugSymbols) - libFileName = buildDirectory + parser.bisonEngineLibName \ + imp.get_suffixes()[0][0] @@ -538,13 +554,7 @@ cdef class ParserEngine: os.rename(libFileName, libFileName+".bak") if parser.verbose: - print 'linking: %s => %s' % (', '.join(objs), libFileName) - - if sys.platform.startswith('darwin'): - # on OSX, ld throws undefined symbol for shared library references - # however, we would like to link against libpython dynamically, so that - # the built .so will not depend on which python interpreter it runs on - env.linker_so += ['-undefined', 'dynamic_lookup'] + print ('linking: %s => %s' % (', '.join(objs), libFileName)) env.link_shared_object(objs, libFileName) @@ -566,7 +576,7 @@ cdef class ParserEngine: fname = buildDirectory + getattr(parser, name) else: fname = None - #print "want to delete %s" % fname + #print ("want to delete %s" % fname) if fname and os.path.isfile(fname): hitlist.append(fname) @@ -575,14 +585,14 @@ cdef class ParserEngine: try: os.unlink(f) except: - print "Warning: failed to delete temporary file %s" % f + print ("Warning: failed to delete temporary file %s" % f) if parser.verbose: - print 'deleting temporary bison output files:' + print ('deleting temporary bison output files:') for f in [parser.bisonCFile, parser.bisonHFile, parser.flexCFile]: if parser.verbose: - print 'rm %s' % f + print ('rm %s' % f) if os.path.isfile(f): os.unlink(f) @@ -608,7 +618,12 @@ cdef class ParserEngine: cbvoid = py_callback invoid = py_input - return bisondynlib_run(handle, parser, cbvoid, invoid, debug) + try: + ret = bisondynlib_run(handle, parser, cbvoid, invoid, debug) + except Exception as e: + print(e) + ret=None + return ret def __del__(self): """ @@ -623,13 +638,25 @@ def cmpLines(meth1, meth2): the order of their declaration in their source file. """ try: - line1 = meth1.func_code.co_firstlineno - line2 = meth2.func_code.co_firstlineno + line1 = meth1.__code__.co_firstlineno + line2 = meth2.__code__.co_firstlineno + except: + line1 = meth1.__init__.__code__.co_firstlineno + line2 = meth2.__init__.__code__.co_firstlineno + + return (line1 > line2) - (line1 < line2) + +def keyLines(meth): + """ + Used as a sort() 'key' argument for sorting parse target handler methods by + the order of their declaration in their source file. + """ + try: + line = meth.__code__.co_firstlineno except: - line1 = meth1.__init__.func_code.co_firstlineno - line2 = meth2.__init__.func_code.co_firstlineno + line = meth.__init__.__code__.co_firstlineno - return cmp(line1, line2) + return line def hashParserObject(parser): @@ -644,20 +671,26 @@ def hashParserObject(parser): lex script, and therefore, whether a shared parser lib rebuild is required. """ - hasher = sha.new() + hasher = hashlib.new('sha1') + def update(o): + if type(o) == type(""): + o=o.encode("utf-8") + hasher.update(o) # add the lex script - hasher.update(parser.lexscript) + update(parser.lexscript) # add the tokens # workaround pyrex weirdness - tokens = list(parser.tokens) - hasher.update(",".join(list(parser.tokens))) + # tokens = list(parser.tokens) + tokens = parser.tokens[0] + update(",".join(tokens)) # add the precedences for direction, tokens in parser.precedences: - hasher.update(direction + "".join(tokens)) + tokens = tokens[0] + update(direction + "".join(tokens)) # extract the parser target handler names handlerNames = dir(parser) @@ -683,7 +716,7 @@ def hashParserObject(parser): # now add in the methods' docstrings for h in handlers: docString = h.__doc__ - hasher.update(docString) + update(docString) # done return hasher.hexdigest() diff --git a/utils/bison2py b/utils/bison2py index 3abce9d..bc5080b 100644 --- a/utils/bison2py +++ b/utils/bison2py @@ -22,20 +22,20 @@ def usage(s=None): Display usage info and exit """ if s: - print progname+": "+s - - print "\n".join([ - "Usage: %s [-c] basefilename" % progname, - " or: %s [-c] grammarfile.y lexfile.l pyfile.py" % progname, - "(generates a boilerplate python file from a grammar and lex file)", - "The first form uses 'basefilename' as base for all files, so:", - " %s fred" % progname, - "is equivalent to:", - " %s fred.y fred.l fred.py" % progname, + print (progname + ': ' + s) + + print ('\n'.join([ + 'Usage: %s [-c] basefilename' % progname, + ' or: %s [-c] grammarfile.y lexfile.l pyfile.py' % progname, + '(generates a boilerplate python file from a grammar and lex file)', + 'The first form uses "basefilename" as base for all files, so:', + ' %s fred' % progname, + 'is equivalent to:', + ' %s fred.y fred.l fred.py' % progname, '', 'The "-c" argument causes the creation of a unique node class', 'for each parse target - highly recommended for complex grammars', - ]) + ])) sys.exit(1) diff --git a/utils/bison2py.py b/utils/bison2py.py index 915a313..aa54642 100644 --- a/utils/bison2py.py +++ b/utils/bison2py.py @@ -6,6 +6,8 @@ Run it with 2 arguments - filename.y and filename.l Output is filename.py """ +from __future__ import absolute_import +from __future__ import print_function import sys from bison import bisonToPython @@ -18,9 +20,9 @@ def usage(s=None): progname = sys.argv[0] if s: - print progname + ': ' + s + print(progname + ': ' + s) - print '\n'.join([ + print('\n'.join([ 'Usage: %s [-c] basefilename' % progname, ' or: %s [-c] grammarfile.y lexfile.l pyfile.py' % progname, '(generates a boilerplate python file from a grammar and lex file)', @@ -31,7 +33,7 @@ def usage(s=None): '', 'The "-c" argument causes the creation of a unique node class', 'for each parse target - highly recommended for complex grammars', - ]) + ])) sys.exit(1)