Franck Pommereau

ported from Python 2

*~
*.pyc
*.pyo
__pycache__
,*
+*
.*.swp
.#*
#*
cctlib/parser.out
#!/usr/bin/env python3
import cctlib.main
cctlib.main.main()
File mode changed
# ---------------------------------------------------------------
# clex.py
#
# Atul Varma
# Python C Compiler - Lexical Analyzer
# $Id: clex.py,v 1.2 2004/06/02 21:05:45 varmaa Exp $
# ---------------------------------------------------------------
import ply.lex as lex
import re
# ---------------------------------------------------------------
# TOKEN LIST
# ---------------------------------------------------------------
tokens = (
# Reserved words
'AUTO',
'BREAK',
'CASE',
'CHAR',
'CONST',
'CONTINUE',
'DEFAULT',
'DO',
'DOUBLE',
'ELSE',
'ENUM',
'EXTERN',
'FLOAT',
'FOR',
'GOTO',
'IF',
'INT',
'LONG',
'REGISTER',
'RETURN',
'SHORT',
'SIGNED',
'SIZEOF',
'STATIC',
'STRUCT',
'SWITCH',
'TYPEDEF',
'UNION',
'UNSIGNED',
'VOID',
'VOLATILE',
'WHILE',
# Special characters
'COMMA',
'COLON',
'SEMICOLON',
'LPAREN',
'RPAREN',
'LBRACKET',
'RBRACKET',
'LBRACE',
'RBRACE',
'ASSIGN',
'GREATER',
'LESS',
'EQ',
'NOT_EQ',
'GREATER_EQ',
'LESS_EQ',
'DOUBLE_PLUS',
'DOUBLE_MINUS',
'PLUS',
'MINUS',
'TIMES',
'DIV',
'MODULO',
'DOUBLE_AMPERSAND',
'DOUBLE_PIPE',
'EXCLAMATION',
'AMPERSAND',
'PIPE',
'CARET',
'ASTERISK',
'QUESTION',
'TILDE',
'POUND',
'DOT',
'ELLIPSIS',
'ARROW',
'SHIFT_LEFT',
'SHIFT_RIGHT',
'EQ_PLUS',
'EQ_MINUS',
'EQ_TIMES',
'EQ_DIV',
'EQ_MODULO',
'EQ_PIPE',
'EQ_AMPERSAND',
'EQ_CARET',
'EQ_SHIFT_LEFT',
'EQ_SHIFT_RIGHT',
# Complex tokens
'ID',
'FNUMBER',
'INUMBER',
'STRING',
'CHARACTER',
)
# ---------------------------------------------------------------
# RESERVED WORDS
# ---------------------------------------------------------------
reserved_words = {
'auto' : 'AUTO',
'break' : 'BREAK',
'case' : 'CASE',
'char' : 'CHAR',
'const' : 'CONST',
'continue' : 'CONTINUE',
'default' : 'DEFAULT',
'do' : 'DO',
'double' : 'DOUBLE',
'else' : 'ELSE',
'enum' : 'ENUM',
'extern' : 'EXTERN',
'float' : 'FLOAT',
'for' : 'FOR',
'goto' : 'GOTO',
'if' : 'IF',
'int' : 'INT',
'long' : 'LONG',
'register' : 'REGISTER',
'return' : 'RETURN',
'short' : 'SHORT',
'signed' : 'SIGNED',
'sizeof' : 'SIZEOF',
'static' : 'STATIC',
'struct' : 'STRUCT',
'switch' : 'SWITCH',
'typedef' : 'TYPEDEF',
'union' : 'UNION',
'unsigned' : 'UNSIGNED',
'void' : 'VOID',
'volatile' : 'VOLATILE',
'while' : 'WHILE'
}
# ---------------------------------------------------------------
# SPECIAL CHARACTERS
# ---------------------------------------------------------------
t_COMMA = r','
t_COLON = r':'
t_SEMICOLON = r';'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_ASSIGN = r'='
t_GREATER = r'>'
t_LESS = r'<'
t_EQ = r'=='
t_NOT_EQ = r'!='
t_GREATER_EQ = r'>='
t_LESS_EQ = r'<='
t_DOUBLE_PLUS = r'\+\+'
t_DOUBLE_MINUS = r'--'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIV = r'/(?!\*)'
t_MODULO = r'%'
t_DOUBLE_AMPERSAND = r'&&'
t_DOUBLE_PIPE = r'\|\|'
t_EXCLAMATION = r'!'
t_AMPERSAND = r'&'
t_PIPE = r'\|'
t_CARET = r'\^'
t_ASTERISK = r'\*'
t_QUESTION = r'\?'
t_TILDE = r'~'
t_POUND = r'\#'
t_ELLIPSIS = r'\.\.\.'
t_DOT = r'\.'
t_ARROW = r'->'
t_SHIFT_LEFT = r'<<'
t_SHIFT_RIGHT = r'>>'
t_EQ_PLUS = r'\+='
t_EQ_MINUS = r'-='
t_EQ_TIMES = r'\*='
t_EQ_DIV = r'/='
t_EQ_MODULO = r'%='
t_EQ_PIPE = r'\|='
t_EQ_AMPERSAND = r'&='
t_EQ_CARET = r'\^='
t_EQ_SHIFT_LEFT = r'<<='
t_EQ_SHIFT_RIGHT = r'>>='
# ---------------------------------------------------------------
# COMPLEX TOKENS
# ---------------------------------------------------------------
def t_ID(t):
r'[A-Za-z_][\w]*'
if t.value in reserved_words:
t.type = reserved_words[t.value]
return t
def t_FNUMBER(t):
r'((0(?!\d))|([1-9]\d*))((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
return t
def t_malformed_fnumber(t):
r'(0\d+)((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
print("Line %d. Malformed floating point number '%s'" % (t.lineno, t.value))
def t_INUMBER(t):
r'0(?!\d)|([1-9]\d*)'
return t
def t_malformed_inumber(t):
r'0\d+'
print("Line %d. Malformed integer '%s'" % (t.lineno, t.value))
def t_CHARACTER(t):
r"'\w'"
return t
def t_STRING(t):
r'"[^\n]*?(?<!\\)"'
temp_str = t.value.replace(r'\\', '')
m = re.search(r'\\[^n"]', temp_str)
if m != None:
print("Line %d. Unsupported character escape %s in string literal."
% (t.lineno, m.group(0)))
return
return t
# ---------------------------------------------------------------
# IGNORED TOKENS
# ---------------------------------------------------------------
def t_WHITESPACE(t):
r'[ \t]+'
pass
def t_NEWLINE(t):
r'\n+'
t.lineno += len(t.value)
def t_COMMENT(t):
r'/\*[\w\W]*?\*/'
t.lineno += t.value.count('\n')
pass
# ---------------------------------------------------------------
# ERROR HANDLING
# ---------------------------------------------------------------
def t_error(t):
print("Line %d." % (t.lineno,) + "",)
if t.value[0] == '"':
print("Unterminated string literal.")
if t.value.count('\n') > 0:
t.skip(t.value.index('\n'))
elif t.value[0:2] == '/*':
print("Unterminated comment.")
else:
print("Illegal character '%s'" % t.value[0])
t.skip(1)
# ---------------------------------------------------------------
# MAIN LEXER FUNCTIONALITY
# ---------------------------------------------------------------
def run_lexer():
"""This is just a debugging function that prints out a list of
tokens, it's not actually called by the compiler or anything."""
import sys
file = open(sys.argv[1])
lines = file.readlines()
file.close()
strings = ""
for i in lines:
strings += i
lex.input(strings)
while 1:
token = lex.token() # Get a token
if not token: break # No more tokens
print("(%s,'%s',%d)" % (token.type, token.value, token.lineno))
lex.lex()
if __name__ == '__main__':
run_lexer()
# ---------------------------------------------------------------
# End of clex.py
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# cparse.py
#
# Atul Varma
# Python C Compiler - Parser
# $Id: cparse.py,v 1.2 2004/05/27 16:25:08 varmaa Exp $
# ---------------------------------------------------------------
import ply.yacc as yacc
from .clex import tokens
# ---------------------------------------------------------------
# ABSTRACT SYNTAX TREE - NODES
# ---------------------------------------------------------------
class Node:
"Base class for all nodes on the abstract syntax tree."
def is_null(self):
"""Returns whether the node represents a null node."""
return 0
def is_const(self):
"""Returns whether the node is a constant numeric number
(e.g., "5")."""
return 0
def has_address(self):
"""Returns whether the node has an address (i.e., is a valid
lvalue)."""
return hasattr(self, "has_addr")
def set_has_address(self):
"""Tells the node that has an address (is an lvalue).
Ultimately, the address of the node should be placed in the
output_addr attribute."""
self.has_addr = 1
self.output_addr = 0
def calculate(self):
"""Calculates the constant numeric value of the node and
its subnodes, if one exists. For instance, if a node
corresponds to the expression "5+3", then this method
would return 8."""
return None
def accept(self, visitor):
"""Accept method for visitor classes (see cvisitor.py)."""
return self._accept(self.__class__, visitor)
def _accept(self, klass, visitor):
"""Accept implementation. This is actually a recursive
function that dynamically figures out which visitor method to
call. This is done by appending the class' name to 'v', so if
the node class is called MyNode, then this method tries
calling visitor.vMyNode(). If that node doesn't exist, then
it recursively attempts to call the visitor method
corresponding to the class' superclass (e.g.,
visitor.vNode())."""
visitor_method = getattr(visitor, "v%s" % klass.__name__, None)
if visitor_method == None:
bases = klass.__bases__
last = None
for i in bases:
last = self._accept(i, visitor)
return last
else:
return visitor_method(self)
class NullNode(Node):
"""A null node is like a null terminator for AST's."""
def __init__(self):
self.type = 'void'
def is_null(self):
return 1
class ArrayExpression(Node):
"""This is an expression with array notation, like "a[5+b]"."""
def __init__(self, expr, index):
self.expr = expr
self.index = index
class StringLiteral(Node):
"""A string literal, e.g. the string "Hello World" in
printf("Hello World")."""
def __init__(self, str):
self._str = str
self.type = PointerType(BaseType('char'))
def append_str(self, str):
self._str += str
def get_str(self):
return self._str
def get_sanitized_str(self):
"""Returns a 'sanitized' version of the string, converting
all carriage returns to '\n' symbols, etc."""
return self._str.replace('\n', '\\n')
class Id(Node):
"""An identifier, which can correspond to the name of
a function, variable, etc..."""
def __init__(self, name, lineno):
self.name = name
self.lineno = lineno
class Const(Node):
"""A numeric constant (i.e., an integral literal), such as
the number 5."""
def __init__(self, value, type):
self.value = value
self.type = type
def calculate(self):
return self.value
def is_const(self):
return 1
def _get_calculated(node):
"""Attempts to calculate the numeric value of the expression,
returning a Const node if it was able to convert the expression.
If the expression isn't a constant expression like "5+3", then
this function just returns the node unmodified."""
result = node.calculate()
if result != None:
result = int(result)
return Const(result, BaseType('int'))
else:
return node
class Unaryop(Node):
"""Any generic unary operator. This is an abstract base class."""
def __init__(self, node):
self.expr = node
class Negative(Unaryop):
"""A negative unary operator, e.g. '-5'."""
def calculate(self):
val = self.expr.calculate()
if val != None:
return -val
return None
class Pointer(Unaryop):
"""A pointer dereference, e.g. '*a'."""
pass
class AddrOf(Unaryop):
"""An address-of operator, e.g. '&a'."""
pass
class Binop(Node):
"""Any binary operator, such as that for arithmetic operations
(+/-/*), assignment operations (=/+=/-=), and so forth."""
# List of assignment operators.
ASSIGN_OPS = ['=', '+=', '-=']
def __init__(self, left, right, op):
self.left = left
self.right = right
self.op = op
def calculate(self):
left = self.left.calculate()
right = self.right.calculate()
if left != None and right != None:
return int(eval("%d %s %d" % (left, self.op, right)))
else:
return None
class IfStatement(Node):
"""An if/then/else statement."""
def __init__(self, expr, then_stmt, else_stmt):
self.expr = expr
self.then_stmt = then_stmt
self.else_stmt = else_stmt
class BreakStatement(Node):
"""A break statement (used while in a loop structure to bust out
of it)."""
pass
class ContinueStatement(Node):
"""A continue statement (used while in a loop structure to bust
back to the beginning of it)."""
pass
class ReturnStatement(Node):
"""A return statement, used to exit a function and optionally
return a value."""
def __init__(self, expr):
self.expr = expr
class ForLoop(Node):
"""A for loop."""
def __init__(self, begin_stmt, expr, end_stmt, stmt):
self.expr = expr
self.stmt = stmt
self.begin_stmt = begin_stmt
self.end_stmt = end_stmt
class WhileLoop(Node):
"""A while loop."""
def __init__(self, expr, stmt):
self.expr = expr
self.stmt = stmt
class NodeList(Node):
"""A list of nodes. This is an abstract base class."""
def __init__(self, node=None):
self.nodes = []
if node != None:
self.nodes.append(node)
def add(self, node):
self.nodes.append(node)
class ArgumentList(NodeList):
"""A list of arguments for a function expression. e.g., the list
'5,2,3' in 'a = my_func(5,2,3)'."""
pass
class ParamList(NodeList):
"""A list of parameters for a function prototype, e.g. the list
'int a, char b, char c' in 'int my_func(int a, char b, char c)'."""
def __init__(self, node=None):
NodeList.__init__(self, node)
self.has_ellipsis = 0
class StatementList(NodeList):
"""Any list of statements. For instance, this can be the list of
statements in a function body."""
pass
class TranslationUnit(NodeList):
"""A list of nodes representing the program itself."""
pass
class DeclarationList(NodeList):
"""A list of variable declarations, such as the ones put
at the beginning of a compound statement (e.g., the beginning
of a function body)."""
pass
class FunctionExpression(Node):
"""An execution of a function, e.g. 'my_func(a,b,c)'."""
def __init__(self, function, arglist):
self.function = function
self.arglist = arglist
class CompoundStatement(Node):
"""A compound statement, e.g. '{ int i; i += 1; }'."""
def __init__(self, declaration_list, statement_list):
self.declaration_list = declaration_list
self.statement_list = statement_list
class FunctionDefn(Node):
"""A node representing a function definition (its declaration
and body)."""
def __init__(self, declaration, body):
self.type = declaration.type
self.name = declaration.name
self.extern = declaration.extern
self.static = declaration.static
self.body = body
class Declaration(Node):
"""A node representing a declaration of a function or
variable."""
def __init__(self, name, type=None):
if type == None:
type = NullNode()
self.extern = 0
self.static = 0
self.type = type
self.name = name
self.is_used = 0
def set_base_type(self, type):
if self.type.is_null():
self.type = type
else:
self.type.set_base_type(type)
def add_type(self, type):
type.set_base_type(self.type)
self.type = type
# ---------------------------------------------------------------
# ABSTRACT SYNTAX TREE - TYPE SYSTEM
# ---------------------------------------------------------------
class Type(Node):
"""A node representing the type of another node. For instance,
the Binop node representing '5 + a', where a is an int, will have
a Type node associated with it that represents the fact that
the result of the Binop is an int.
Types can also be nested, so that for instance you can have
a type like 'pointer(pointer(int))' which represents a
double-pointer to an int.
This is an abstract base class."""
def __init__(self, child=None):
if child == None:
child = NullNode()
self.child = child
def set_base_type(self, type):
"""Set the base (innermost) type of a type. For instance,
calling this with a pointer(int) type on a pointer() type
will give you a pointer(pointer(int))."""
if self.child.is_null():
self.child = type
else:
self.child.set_base_type(type)
def get_string(self):
"""Return a string corresponding to the type, e.g.
'pointer(pointer(int))'."""
raise NotImplementedError()
def get_outer_string(self):
"""Return only the outermost type of a type. e.g.,
calling this on a pointer(pointer(int)) type will
return 'pointer'."""
raise NotImplementedError()
def is_function(self):
"""Returns whether or not this type represents a
function."""
return 0
class BaseType(Type):
"""A base type representing ints, chars, etc..."""
def __init__(self, type_str, child=None):
Type.__init__(self, child)
self.type_str = type_str
def get_string(self):
return self.type_str
def get_outer_string(self):
return self.type_str
class FunctionType(Type):
"""A type representing a function (for function prototypes and
function calls)."""
def __init__(self, params=None, child=None):
Type.__init__(self, child)
if (params == None):
params = NullNode()
self.params = params
def get_string(self):
param_str = ""
for param in self.params.nodes:
param_str += "," + param.type.get_string()
return "function(%s)->%s" % (param_str[1:], self.child.get_string())
def get_outer_string(self):
return 'function'
def is_function(self):
return 1
def get_return_type(self):
"""Returns the return type of the function. Internally,
this is stored as the nested type within the function."""
return self.child
def get_params(self):
"""Returns the list of parameters for the function."""
return self.params
class PointerType(Type):
"""A type representing a pointer to another (nested) type."""
def get_string(self):
return "pointer(%s)" % self.child.get_string()
def get_outer_string(self):
return 'pointer'
# ---------------------------------------------------------------
# PARSER GRAMMAR / AST CONSTRUCTION
#
# The only thing the yacc grammar rules do is create an
# abstract syntax tree. Actual symbol table generation,
# type checking, flow control checking, etc. are done by
# the visitor classes (see cvisitors.py).
# ---------------------------------------------------------------
# Precedence for ambiguous grammar elements.
precedence = (
('right', 'ELSE'),
)
class ParseError(Exception):
"Exception raised whenever a parsing error occurs."
pass
def p_translation_unit_01(t):
'''translation_unit : external_declaration'''
t[0] = TranslationUnit(t[1])
def p_translation_unit_02(t):
'''translation_unit : translation_unit external_declaration'''
t[1].add(t[2])
t[0] = t[1]
def p_external_declaration(t):
'''external_declaration : function_definition
| declaration'''
t[0] = t[1]
def p_function_definition_01(t):
'''function_definition : type_specifier declarator compound_statement'''
t[2].set_base_type(t[1])
t[0] = FunctionDefn(t[2], t[3])
def p_function_definition_02(t):
'''function_definition : STATIC type_specifier declarator compound_statement'''
t[3].static = 1
t[3].set_base_type(t[2])
t[0] = FunctionDefn(t[3], t[4])
def p_declaration_01(t):
'''declaration : type_specifier declarator SEMICOLON'''
if isinstance(t[2].type, FunctionType):
t[2].extern = 1
t[2].set_base_type(t[1])
t[0] = t[2]
def p_declaration_02(t):
'''declaration : EXTERN type_specifier declarator SEMICOLON'''
t[3].extern = 1
t[3].set_base_type(t[2])
t[0] = t[3]
def p_declaration_list_opt_01(t):
'''declaration_list_opt : empty'''
t[0] = NullNode()
def p_declaration_list_opt_02(t):
'''declaration_list_opt : declaration_list'''
t[0] = t[1]
def p_declaration_list_02(t):
'''declaration_list : declaration'''
t[0] = DeclarationList(t[1])
def p_declaration_list_03(t):
'''declaration_list : declaration_list declaration'''
t[1].add(t[2])
t[0] = t[1]
def p_type_specifier(t):
'''type_specifier : INT
| CHAR'''
t[0] = BaseType(t[1])
def p_declarator_01(t):
'''declarator : direct_declarator'''
t[0] = t[1]
def p_declarator_02(t):
'''declarator : ASTERISK declarator'''
t[2].set_base_type(PointerType())
t[0] = t[2]
def p_direct_declarator_01(t):
'''direct_declarator : ID'''
t[0] = Declaration(t[1])
def p_direct_declarator_02(t):
'''direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN'''
t[1].add_type(FunctionType(t[3]))
t[0] = t[1]
def p_direct_declarator_03(t):
'''direct_declarator : direct_declarator LPAREN RPAREN'''
t[1].add_type(FunctionType(ParamList()))
t[0] = t[1]
def p_parameter_type_list_01(t):
'''parameter_type_list : parameter_list'''
t[0] = t[1]
def p_parameter_type_list_02(t):
'''parameter_type_list : parameter_list COMMA ELLIPSIS'''
t[1].has_ellipsis = 1
t[0] = t[1]
def p_parameter_list_01(t):
'''parameter_list : parameter_declaration'''
t[0] = ParamList(t[1])
def p_parameter_list_02(t):
'''parameter_list : parameter_list COMMA parameter_declaration'''
t[1].add(t[3])
t[0] = t[1]
def p_parameter_declaration(t):
'''parameter_declaration : type_specifier declarator'''
# NOTE: this is the same code as p_declaration_01!
p_declaration_01(t)
def p_compound_statement_01(t):
'''compound_statement : LBRACE declaration_list_opt statement_list RBRACE'''
t[0] = CompoundStatement(t[2], t[3])
def p_compound_statement_02(t):
'''compound_statement : LBRACE declaration_list_opt RBRACE'''
t[0] = CompoundStatement(t[2], NullNode())
def p_expression_statement(t):
'''expression_statement : expression SEMICOLON'''
t[0] = t[1]
def p_expression_01(t):
'''expression : equality_expression'''
t[0] = t[1]
def p_expression_02(t):
'''expression : equality_expression ASSIGN expression
| equality_expression EQ_PLUS expression
| equality_expression EQ_MINUS expression'''
t[0] = Binop(t[1], t[3], t[2])
def p_equality_expression_01(t):
'''equality_expression : relational_expression'''
t[0] = t[1]
def p_equality_expression_02(t):
'''equality_expression : equality_expression EQ relational_expression
| equality_expression NOT_EQ relational_expression'''
t[0] = _get_calculated(Binop(t[1], t[3], t[2]))
def p_relational_expression_01(t):
'''relational_expression : additive_expression'''
t[0] = t[1]
def p_relational_expression_02(t):
'''relational_expression : relational_expression LESS additive_expression
| relational_expression GREATER additive_expression
| relational_expression LESS_EQ additive_expression
| relational_expression GREATER_EQ additive_expression'''
t[0] = _get_calculated(Binop(t[1], t[3], t[2]))
def p_postfix_expression_01(t):
'''postfix_expression : primary_expression'''
t[0] = t[1]
def p_postfix_expression_02(t):
'''postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN'''
t[0] = FunctionExpression(t[1], t[3])
pass
def p_postfix_expression_03(t):
'''postfix_expression : postfix_expression LPAREN RPAREN'''
t[0] = FunctionExpression(t[1], ArgumentList())
def p_postfix_expression_04(t):
'''postfix_expression : postfix_expression LBRACKET expression RBRACKET'''
t[0] = ArrayExpression(t[1], t[3])
def p_argument_expression_list_01(t):
'''argument_expression_list : expression'''
t[0] = ArgumentList(t[1])
def p_argument_expression_list_02(t):
'''argument_expression_list : argument_expression_list COMMA expression'''
t[1].add(t[3])
t[0] = t[1]
def p_unary_expression_01(t):
'''unary_expression : postfix_expression'''
t[0] = t[1]
def p_unary_expression_02(t):
'''unary_expression : MINUS unary_expression'''
t[0] = _get_calculated(Negative(t[2]))
def p_unary_expression_03(t):
'''unary_expression : PLUS unary_expression'''
t[0] = t[2]
def p_unary_expression_03(t):
'''unary_expression : EXCLAMATION unary_expression'''
# horrible hack for the '!' operator... Just insert an
# (expr == 0) into the AST.
t[0] = _get_calculated(Binop(t[2], Const(0, BaseType('int')), '=='))
def p_unary_expression_04(t):
'''unary_expression : ASTERISK unary_expression'''
t[0] = Pointer(t[2])
def p_unary_expression_05(t):
'''unary_expression : AMPERSAND unary_expression'''
t[0] = AddrOf(t[2])
def p_mult_expression_01(t):
'''mult_expression : unary_expression'''
t[0] = t[1]
def p_mult_expression_02(t):
'''mult_expression : mult_expression ASTERISK unary_expression
| mult_expression DIV unary_expression
| mult_expression MODULO unary_expression'''
t[0] = _get_calculated(Binop(t[1], t[3], t[2]))
def p_additive_expression_01(t):
'''additive_expression : mult_expression'''
t[0] = t[1]
def p_additive_expression_02(t):
'''additive_expression : additive_expression PLUS mult_expression
| additive_expression MINUS mult_expression'''
t[0] = _get_calculated(Binop(t[1], t[3], t[2]))
def p_primary_expression_01(t):
'''primary_expression : ID'''
t[0] = Id(t[1], t.lineno(1))
def p_primary_expression_02(t):
'''primary_expression : INUMBER'''
t[0] = Const(int(t[1]), BaseType('int'))
def p_primary_expression_03(t):
'''primary_expression : FNUMBER'''
t[0] = Const(float(t[1]), BaseType('double'))
def p_primary_expression_04(t):
'''primary_expression : CHARACTER'''
t[0] = Const(ord(eval(t[1])), BaseType('char'))
def p_primary_expression_05(t):
'''primary_expression : string_literal'''
t[0] = t[1]
def p_primary_expression_06(t):
'''primary_expression : LPAREN expression RPAREN'''
t[0] = t[2]
def p_string_literal_01(t):
'''string_literal : STRING'''
t[0] = StringLiteral(eval(t[1]))
def p_string_literal_02(t):
'''string_literal : string_literal STRING'''
t[1].append_str(eval(t[2]))
t[0] = t[1]
def p_statement(t):
'''statement : compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement'''
t[0] = t[1]
def p_jump_statement_01(t):
'''jump_statement : RETURN SEMICOLON'''
t[0] = ReturnStatement(NullNode())
def p_jump_statement_02(t):
'''jump_statement : RETURN expression SEMICOLON'''
t[0] = ReturnStatement(t[2])
def p_jump_statement_03(t):
'''jump_statement : BREAK SEMICOLON'''
t[0] = BreakStatement()
def p_jump_statement_04(t):
'''jump_statement : CONTINUE SEMICOLON'''
t[0] = ContinueStatement()
def p_iteration_statement_01(t):
'''iteration_statement : WHILE LPAREN expression RPAREN statement'''
t[0] = WhileLoop(t[3], t[5])
def p_iteration_statement_02(t):
'''iteration_statement : FOR LPAREN expression_statement expression_statement expression RPAREN statement'''
t[0] = ForLoop(t[3], t[4], t[5], t[7])
def p_selection_statement_01(t):
'''selection_statement : IF LPAREN expression RPAREN statement'''
t[0] = IfStatement(t[3], t[5], NullNode())
def p_selection_statement_02(t):
'''selection_statement : IF LPAREN expression RPAREN statement ELSE statement'''
t[0] = IfStatement(t[3], t[5], t[7])
def p_statement_list_02(t):
'''statement_list : statement'''
t[0] = StatementList(t[1])
def p_statement_list_03(t):
'''statement_list : statement_list statement'''
t[1].add(t[2])
t[0] = t[1]
def p_empty(t):
'empty :'
pass
def p_error(t):
print("You've got a syntax error somewhere in your code.")
print("It could be around line %d." % t.lineno)
print("Good luck finding it.")
raise ParseError()
yacc.yacc(debug=1)
# ---------------------------------------------------------------
# End of cparse.py
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# cvisitors.py
#
# Atul Varma
# Python C Compiler - Visitors
# $Id: cvisitors.py,v 1.3 2004/05/27 17:51:47 varmaa Exp $
#
# The Visitor is a pattern outlined in "Design Patterns" by
# Gamma et al., used here to encapsulate different parts of parsing
# and compilation into separate classes via a mechanism called
# double dispatching.
#
# In this compiler, the yacc grammar rules in cparse.py just create
# the abstract syntax tree, and visitors do the bulk of parsing
# and compilation.
# ---------------------------------------------------------------
# TODO: make it so functions can return void.
# TODO: mark all statements with an 'ignore return value' flag
# to enable some optimizations if the statement is an
# expression.
# TODO: move extern, static indicators in functions to their
# Type object, maybe.
#
# Possible things to do:
# Add compilation to JVM/python bytecode/z-machine...
# Implement arrays
# Pass line numbers to constructors for nodes
#
# Faults so far:
# * doesn't check for variable initialization before use.
# * const number ranges aren't being checked.
from . import cparse
class Visitor:
"""The base visitor class. This is an abstract base class."""
def __init__(self):
self.warnings = 0
self.errors = 0
def _visitList(self, list):
"""Visit a list of nodes. 'list' should be an actual list,
not a cparse.NodeList object."""
last = None
for i in list:
last = i.accept(self)
return last
def visit(self, node):
"""Visits the given node by telling the node to call the
visitor's class-specific visitor method for that node's
class (i.e., double dispatching)."""
return node.accept(self)
def warning(self, str):
"""Output a non-fatal compilation warning."""
print("warning: %s" % str)
self.warnings += 1
def error(self, str):
"""Output a fatal compilation error."""
print("error: %s" % str)
self.errors += 1
def has_errors(self):
"""Returns whether the visitor has encountered any
errors."""
return self.errors > 0
# ---------------------------------------------------------------
# ABSTRACT SYNTAX TREE PRINTER (for debugging)
# ---------------------------------------------------------------
class ASTPrinterVisitor(Visitor):
"""Simple visitor that outputs a textual representation of
the abstract syntax tree, for debugging purposes, to an
output file."""
def __init__(self, ast_file, indent_amt=2):
self.ast_file = ast_file
Visitor.__init__(self)
self._indent = 0
self._indent_amt = indent_amt
def indent(self):
self._indent += self._indent_amt
def unindent(self):
self._indent -= self._indent_amt
def p(self, str):
self.ast_file.write(
(' ' * (self._indent_amt * self._indent) ) + str + "\n" )
def pNodeInfo(self, node):
# Print out the name of the node's class.
self.p('+ ' + node.__class__.__name__)
# If the node has a type associated with it,
# print the string of the type.
if hasattr(node, "type"):
self.p(" Type-string: %s" % node.type.get_string())
# Find all attributes of the node that are ints or
# strings and aren't 'private' (i.e., don't begin with
# '_'), and print their values.
for key in dir(node):
if key[0] == '_':
continue
val = node.__dict__[key]
if (isinstance(val, str) or
isinstance(val, int)):
self.p(" %s: %s" % (key, str(val)))
def pSubnodeInfo(self, subnode, label):
if not subnode.is_null():
self.p(" %s:" % label)
self.indent()
subnode.accept(self)
self.unindent()
def vNullNode(self, node):
self.pNodeInfo(node)
def vArrayExpression(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.expr, "Expression")
self.pSubnodeInfo(node.index, "Index")
def vStringLiteral(self, node):
self.pNodeInfo(node)
self.p(' Value: "%s"' % node.get_sanitized_str())
def vId(self, node):
self.pNodeInfo(node)
def vUnaryop(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.expr, "Expression")
def vFunctionExpression(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.function, "Function")
self.pSubnodeInfo(node.arglist, "Arguments")
def vConst(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.type, "Type")
def vBinop(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.left, "Left operand")
self.pSubnodeInfo(node.right, "Right operand")
def vNodeList(self, node):
self.pNodeInfo(node)
self.indent()
self._visitList(node.nodes)
self.unindent()
def vCompoundStatement(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.declaration_list, "Declaration list")
self.pSubnodeInfo(node.statement_list, "Statement list")
def vBaseType(self, node):
self.pNodeInfo(node)
def vFunctionType(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.params, "Parameters:")
self.pSubnodeInfo(node.child, "Child:")
def vPointerType(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.child, "Child:")
def vDeclaration(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.type, "Type")
def vReturnStatement(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.expr, "Expression")
def vFunctionDefn(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.type, "Type")
self.pSubnodeInfo(node.body, "Body")
def vIfStatement(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.expr, "Expression")
self.pSubnodeInfo(node.then_stmt, "Then statement")
self.pSubnodeInfo(node.else_stmt, "Else statement")
def vWhileLoop(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.expr, "Expression")
self.pSubnodeInfo(node.stmt, "Statement")
def vForLoop(self, node):
self.pNodeInfo(node)
self.pSubnodeInfo(node.begin_stmt, "Begin statement")
self.pSubnodeInfo(node.expr, "Test expression")
self.pSubnodeInfo(node.end_stmt, "End statement")
self.pSubnodeInfo(node.stmt, "Statement")
# ---------------------------------------------------------------
# SYMBOL TABLE GENERATION
# ---------------------------------------------------------------
class Symtab:
"""A symbol table. This is a simple object that just keeps a
hashtable of symbol names and the Declaration or FunctionDefn
nodes that they refer to.
There is a separate symbol table for each code element that
has its own scope (for instance, each compound statement will
have its own symbol table). As a result, symbol tables can
be nested if the code elements are nested, and symbol table
lookups will recurse upwards through parents to represent
lexical scoping rules."""
class SymbolDefinedError(Exception):
"""Exception raised when the code tries to add a symbol
to a table where the symbol has already been defined.
Note that 'defined' is used in the C sense here--i.e.,
'space has been allocated for the symbol', as opposed
to a declaration."""
pass
class SymbolConflictError(Exception):
"""Exception raised when the code tries to add a
symbol to a tamble where the symbol already exists
and its type differs from the previously existing
one."""
pass
def __init__(self, parent=None):
"""Creates an empty symbol table with the given
parent symbol table."""
self.entries = {}
self.parent = parent
if self.parent != None:
self.parent.children.append(self)
self.children = []
def add(self, name, value):
"""Adds a symbol with the given value to the symbol table.
The value is usually an AST node that represents the
declaration or definition of a function/variable (e.g.,
Declaration or FunctionDefn)."""
if name in self.entries:
if not self.entries[name].extern:
raise Symtab.SymbolDefinedError()
elif self.entries[name].type.get_string() != \
value.type.get_string():
raise Symtab.SymbolConflictError()
self.entries[name] = value
def get(self, name):
"""Retrieves the symbol with the given name from the symbol
table, recursing upwards through parent symbol tables if it is
not found in the current one."""
if name in self.entries:
return self.entries[name]
else:
if self.parent != None:
return self.parent.get(name)
else:
return None
class SymtabVisitor(Visitor):
"""Visitor that creates and attaches symbol tables to the AST."""
def push_symtab(self, node):
"""Pushes a new symbol table onto the visitor's symbol table
stack and attaches this symbol table to the given node. This
is used whenever a new lexical scope is encountered, so the
node is usually a CompoundStatement object."""
self.curr_symtab = Symtab(self.curr_symtab)
node.symtab = self.curr_symtab
def pop_symtab(self):
"""Pops a symbol table off the visitor's symbol table stack.
This is used whenever a new lexical scope is exited."""
self.curr_symtab = self.curr_symtab.parent
def vNode(self, node):
pass
def vArrayExpression(self, node):
node.expr.accept(self)
node.index.accept(self)
def vFunctionExpression(self, node):
node.function.accept(self)
node.arglist.accept(self)
def vId(self, node):
symbol = self.curr_symtab.get(node.name)
if symbol != None:
node.symbol = symbol
node.symbol.is_used = 1
node.set_has_address()
else:
self.error("Line %d: Unknown identifier '%s'." % (node.lineno, node.name))
def vUnaryop(self, node):
node.expr.accept(self)
def vBinop(self, node):
node.left.accept(self)
node.right.accept(self)
def vNodeList(self, node):
self._visitList(node.nodes)
def vParamList(self, node):
# Assign a number to each parameter. This will later be
# useful for the code generation phase.
#
# TODO: might be best to just move this to the code
# generation phase, since this doesn't have anything to
# do with symbol table generation.
param_num = 0
for param in node.nodes:
param.accept(self)
param.param_num = param_num
param_num += 1
def vTranslationUnit(self, node):
self.root_symtab = Symtab()
self.curr_symtab = self.root_symtab
self.vNodeList(node)
node.symtab = self.root_symtab
def vCompoundStatement(self, node):
self.push_symtab(node)
node.declaration_list.accept(self)
node.statement_list.accept(self)
self.pop_symtab()
def _add_symbol(self, node):
"""Attempts to add a symbol for the given node to the current
symbol table, catching any exceptions that occur and printing
errors if necessary."""
try:
self.curr_symtab.add(node.name, node)
except Symtab.SymbolDefinedError:
self.error("Symbol '%s' already defined." % node.name)
except Symtab.SymbolConflictError:
self.error("Symbol '%s' has multiple differing declarations." % node.name)
def vDeclaration(self, node):
self._add_symbol(node)
def vReturnStatement(self, node):
node.expr.accept(self)
def vFunctionType(self, node):
node.params.accept(self)
def vFunctionDefn(self, node):
self._add_symbol(node)
self.push_symtab(node)
node.type.accept(self)
node.body.accept(self)
self.pop_symtab()
def vIfStatement(self, node):
node.expr.accept(self)
node.then_stmt.accept(self)
node.else_stmt.accept(self)
def vWhileLoop(self, node):
node.expr.accept(self)
node.stmt.accept(self)
def vForLoop(self, node):
node.begin_stmt.accept(self)
node.expr.accept(self)
node.end_stmt.accept(self)
node.stmt.accept(self)
# ---------------------------------------------------------------
# TYPE CHECKING
# ---------------------------------------------------------------
class TypeCheckVisitor(Visitor):
"""Visitor that performs type checking on the AST, attaching a
Type object subclass to every eligible node and making sure these
types don't conflict."""
def _process_conditional(self, expr):
"""Does simple type checking for an expression that is
supposed to be the expression for a conditional
statement (e.g., the conditional clause of an if/then
statement or a loop)."""
if expr.type.get_outer_string() not in ['int', 'char']:
self.error("Conditional expression doesn't evaluate to an int/char/etc.")
def _coerce_consts(self, var1, var2):
"""Looks at two typed terminals to see if one of them
is a constant integral. If it is, then coerce it to
the type of the other terminal.
Note that both terminals cannot be constant integrals, or else
they would have already been reduced to one node by the node's
calculate() method in the parsing stage."""
if var1.is_const():
self._coerce_const(var1, var2.type)
elif var2.is_const():
self._coerce_const(var2, var1.type)
def _coerce_const(self, var, type):
"""If the given typed terminal is a constant, coerces it to
the given type."""
if var.is_const() and type.get_string() in ['int', 'char']:
var.type = type
def _check_const_range(self, var, type):
"""Checks the given integral constant to make sure its value
is within the bounds of the given type."""
val = var.value
type_str = type.get_outside_string()
# TODO: implement this!
if type_str == 'char':
pass
elif type_str == 'int':
pass
def _compare_types(self, name_str, from_type, to_type, raise_errors=1):
"""Compares the two types to see if it's possible to perform a
binary operation on them. If it is not, then the appropriate
errors/warnings are raised, unless raise_errors is set to
0."""
WARNING = 1
ERROR = 2
conflict = 0
from_str = from_type.get_string()
to_str = to_type.get_string()
if (from_str != to_str):
if from_str == 'char':
if to_str == 'int':
pass
else:
conflict = ERROR
elif from_str == 'int':
if to_str == 'char':
conflict = WARNING
else:
conflict = ERROR
else:
conflict = ERROR
if not raise_errors:
return conflict
if conflict == WARNING:
self.warning("%s: Conversion from %s to %s may result in data loss." % (name_str, from_str, to_str))
elif conflict == ERROR:
self.error("%s: Cannot convert from %s to %s." % (name_str, from_str, to_str))
def vNode(self, node):
pass
def vId(self, node):
node.type = node.symbol.type
def vNegative(self, node):
node.expr.accept(self)
node.type = node.expr.type
# TODO: check to make sure expr is a signed type?
def vAddrOf(self, node):
node.expr.accept(self)
if not node.expr.has_address():
self.error("Address-of (&) target has no address!")
else:
node.expr.output_addr = 1
node.type = cparse.PointerType(node.expr.type)
def vPointer(self, node):
node.expr.accept(self)
if node.expr.type.get_outer_string() == 'pointer':
node.type = node.expr.type.child
node.set_has_address()
else:
self.error("Pointer dereference (*) target is not a pointer!")
def vBinop(self, node):
node.left.accept(self)
node.right.accept(self)
if node.op in cparse.Binop.ASSIGN_OPS:
if not node.left.has_address():
self.error("Invalid lvalue: not an address!")
node.left.output_addr = 1
self._coerce_const(node.right, node.left.type)
# TODO: re-implement this!
# elif node.left.symbol.is_constant:
# self.error("Invalid lvalue: lvalue is constant!")
self._compare_types("Assignment", node.right.type, node.left.type)
node.right.coerce_to_type = node.left.type
node.type = node.left.type
else:
# TODO: not sure if this results in the ANSI C
# specification for binary operand type coercion.
self._coerce_consts(node.left, node.right)
left_conflicts = self._compare_types("", node.right.type, node.left.type, raise_errors=0)
right_conflicts = self._compare_types("", node.left.type, node.right.type, raise_errors=0)
if left_conflicts < right_conflicts:
from_node = node.right
to_node = node.left
else:
from_node = node.left
to_node = node.right
self._compare_types("Binop '%s'" % node.op, from_node.type, to_node.type)
from_node.coerce_to_type = to_node.type
to_node.coerce_to_type = to_node.type
node.type = to_node.type
def vNodeList(self, node):
self._visitList(node.nodes)
def vCompoundStatement(self, node):
node.statement_list.accept(self)
def vReturnStatement(self, node):
node.expr.accept(self)
return_type = self.curr_func.type.get_return_type()
self._coerce_const(node.expr, return_type)
self._compare_types("Return expression", node.expr.type, return_type)
node.expr.coerce_to_type = return_type
def vArrayExpression(self, node):
node.expr.accept(self)
node.index.accept(self)
if node.index.type.get_outer_string() not in ['int', 'char']:
self.error("Array index is not an int or char!")
elif node.expr.type.get_outer_string() != 'pointer':
self.error("Array expression is not a pointer!")
else:
node.type = node.expr.type.child
node.set_has_address()
def vFunctionExpression(self, node):
node.function.accept(self)
if not node.function.type.is_function():
self.error("Target of function expression is not a function!")
node.type = node.function.symbol.type.get_return_type()
node.arglist.accept(self)
params = node.function.symbol.type.get_params()
num_args = len(node.arglist.nodes)
num_params = len(params.nodes)
if (not params.has_ellipsis) and (num_args > num_params):
self.error("Too many arguments passed to function.")
elif num_args < num_params:
self.error("Too few arguments passed to function.")
for arg, param in zip(node.arglist.nodes, params.nodes):
self._coerce_const(arg, param.type)
self._compare_types("Function call argument", arg.type, param.type)
arg.coerce_to_type = param.type
# If this function takes a variable number of args and
# we've got more args than required parameters, we need
# to set some of the extra arguments' field(s) properly.
if (params.has_ellipsis) and (num_args > num_params):
for arg in node.arglist.nodes[num_params:]:
arg.coerce_to_type = arg.type
def vFunctionDefn(self, node):
self.curr_func = node
node.body.accept(self)
def vIfStatement(self, node):
node.expr.accept(self)
self._process_conditional(node.expr)
node.then_stmt.accept(self)
node.else_stmt.accept(self)
def vWhileLoop(self, node):
node.expr.accept(self)
self._process_conditional(node.expr)
node.stmt.accept(self)
def vForLoop(self, node):
node.begin_stmt.accept(self)
node.expr.accept(self)
self._process_conditional(node.expr)
node.end_stmt.accept(self)
node.stmt.accept(self)
# ---------------------------------------------------------------
# FLOW CONTROL
# ---------------------------------------------------------------
class FlowControlVisitor(Visitor):
"""Performs flow control checking on the AST. This makes sure
that functions return properly through all branches, that
break/continue statements are only present within loops, and so
forth."""
def vNode(self, node):
node.has_return_stmt = 0
def vStatementList(self, node):
node.has_return_stmt = 0
for stmt in node.nodes:
if node.has_return_stmt:
self.warning("Function %s has at least one unreachable statement." % self.curr_func.name)
stmt.accept(self)
if stmt.has_return_stmt:
node.has_return_stmt = 1
def vTranslationUnit(self, node):
self._visitList(node.nodes)
def vWhileLoop(self, node):
old_in_loop = self.in_loop
self.in_loop = 1
node.stmt.accept(self)
self.in_loop = old_in_loop
node.has_return_stmt = node.stmt.has_return_stmt
def vForLoop(self, node):
self.vWhileLoop(node)
def vBreakStatement(self, node):
node.has_return_stmt = 0
if not self.in_loop:
self.error("Break statement outside of loop.")
def vContinueStatement(self, node):
node.has_return_stmt = 0
if not self.in_loop:
self.error("Continue statement outside of loop.")
def vIfStatement(self, node):
node.then_stmt.accept(self)
node.else_stmt.accept(self)
if node.then_stmt.has_return_stmt and node.else_stmt.has_return_stmt:
node.has_return_stmt = 1
else:
node.has_return_stmt = 0
def vFunctionDefn(self, node):
self.curr_func = node
self.in_loop = 0
node.body.accept(self)
if not node.body.has_return_stmt:
self.warning("Function %s doesn't return through all branches." % node.name)
def vReturnStatement(self, node):
node.has_return_stmt = 1
def vCompoundStatement(self, node):
node.statement_list.accept(self)
node.has_return_stmt = node.statement_list.has_return_stmt
# ---------------------------------------------------------------
# End of cvisitors.py
# ---------------------------------------------------------------
# ---------------------------------------------------------------
# cx86.py
#
# Atul Varma
# Python C Compiler - Intel x86 Code Generator
# $Id: cx86.py,v 1.3 2004/06/02 21:05:23 varmaa Exp $
# ---------------------------------------------------------------
from . import cparse
from .cvisitors import Visitor
# ---------------------------------------------------------------
# CONSTANTS
# ---------------------------------------------------------------
# Size of the 'int' type.
INT_SIZE = 4
# Size of the 'char' type.
CHAR_SIZE = 1
# The machine's word size. Note that making this different
# from INT_SIZE may cause serious problems.
WORD_SIZE = 4
# This is a strange multiplier that needs to be used in the allocation
# of global variables for the GNU Assembler. Not sure exactly what it
# represents.
WEIRD_MULTIPLIER = 4
# ---------------------------------------------------------------
# STACK MACHINE ABSTRACTION
# ---------------------------------------------------------------
class x86Registers:
"""This class attempts to abstract the x86 registers into a stack
machine. Calling push() gives you a register that isn't currently
in use by the stack machine, pop() gives you a register with the
value of the most recently pushed element.
Through this method the stack machine can be used to compute
values the same way a reverse polish notation (RPN) calculator
does.
When push() and pop() are called, it may be the case that no
registers are currently available; if this happens, the least
recently used register is 'spilled' into a temporary local
variable on the process' stack and freed for use. Note that the
process' stack is not to be confused with this stack machine
abstraction--the two are completely different entities.
Currently, push() and pop() also implement a little bit of
implicit type conversion, so they take as parameters a cparse.Type
object; currently conversion is done between char and int types,
so depending on the pushed and popped types, some type conversion
assembly code may be generated.
Finally, an additional method, done(), should be called whenever
the stack machine is done popping values for the current
operation. This is because when pop is called, the returned
register is not immediately made 'free' for another call to pop or
push. If this were the case, then the following situation could
occur:
rightOp.calc() # calc val of right op, put on stack
leftOp.calc() # calc val of left op, put on stack
l = leftOp.pop() # pop left val from stack
r = rightOp.pop() # pop right val from stack
output('addl %s, %s' % (r, l))
The problem with this approach is that we don't know how many
registers will be used by leftOp's calc() method--it may use all
the remaining registers, in which case the value that rightOp's
calc() method put on the stack is no longer stored in a register.
If leftOp.pop() returned register %eax and immediately marked the
%eax register as being 'free for use', then the call to
rightOp.pop() could very well generate code that moves rightOp's
value from a temporary variable into %eax, thereby overwriting
leftOp's value!
So, instead, the pop() method places the %eax register (in this
example) into an internal list of 'almost free' registers;
registers that have just been returned by pop() but shouldn't be
used by the stack machine until a call to done() is made. The
done() method simply moves the registers in the 'almost free' list
over to the 'free' list."""
def __init__(self, parent, base_fp):
# A list of all registers on the machine.
self.all_regs = ['%ebx','%esi','%edi','%eax','%ecx','%edx']
# A list of the registers currently free. Note that this
# is a *copy* of the list of all registers on the machine.
self.regs_free = self.all_regs[:]
# A list of all the registers that are "almost" free
# (see the docstring for this class).
self.regs_almost_free = []
# A list of all the temporary variable memory locations
# that are currently unused.
self.mem_free = []
# A list corresponding to the actual stack of the stack
# machine. The item at the top of the stack is the
# last element of this list.
self.stack = []
# A list that stores the Type objects of each corresponding
# element on the stack machine's stack. e.g., type_stack[0]
# represents the type of the element at stack[0].
self.type_stack = []
# The location of the next memory location to be used for
# temporary variables, relative to the current function's
# frame pointer.
self.next_temp = base_fp - WORD_SIZE
# The parent CodeGenVisitor object of this stack machine.
self.parent = parent
# A list of the callee-save registers that have been used
# so far by this function. Once processing is finished,
# these registers will be pushed onto the process' stack
# at the beginning of the function and popped off just
# before the function terminates.
self.callee_save_regs_used = []
# A list of the caller-save registers on the machine.
self.caller_save_regs = ['%eax', '%ecx', '%edx']
# A list of the callee-save registers on the machine.
self.callee_save_regs = ['%ebx', '%esi', '%edi']
# A list of the registers on the machine that have
# sub-registers allowing access to their low-order bytes.
self.byte_compat_regs = ['%eax', '%ebx', '%ecx', '%edx']
# The default type of an element that is pushed onto
# the stack machine without a 'type' object passed.
self.default_type = cparse.BaseType('int')
def o(self, str, comment=None):
"""Wrapper for the parent CodeGenVisitor's o() method."""
self.parent.o(str, comment)
def save_caller_saves(self):
"""Saves the caller-save registers, which should be done
before the current function makes a function call, so that
the registers don't get corrupted by the called function.
Normally, this is done by pushing the caller-save registers
onto the stack just before the function call is made and
popping them off afterwards; however, due to the workings of
this particular stack machine it's much easier to just move
the contents of the caller-save registers, if they are
currently being used, into temporary variables."""
for reg in self.caller_save_regs:
if reg not in self.regs_free:
self._copy_reg_to_temp([reg],
"Save caller-save register to temp")
self.regs_free.append(reg)
def save_callee_saves(self):
"""Emits code that pushes the callee-save registers used by
the stack machine onto the process' stack."""
for reg in self.callee_save_regs_used:
self.o(" pushl %s" % reg,
"Save callee-save register")
def load_callee_saves(self):
"""Emits code that pops the callee-save registers used by
the stack machine off the process' stack."""
for reg in self.callee_save_regs_used:
self.o(" popl %s" % reg,
"Restore callee-save register")
def _copy_reg_to_temp(self, valid_regs, comment_str=None):
"""Copy the least recently used register on the stack into a
temporary variable. The register must be in the valid_regs
list."""
# if no free temp variables exist,
# create a new one.
if len(self.mem_free) == 0:
self.mem_free.append("%d(%%ebp)" % self.next_temp)
self.next_temp -= WORD_SIZE
# get an unused temp var
mem = self.mem_free.pop()
# find the least recently used register on the stack
reg = None
index = 0
for i in self.stack:
if i in valid_regs:
reg = i
break
index += 1
if reg == None:
raise Exception("No free registers inside OR outside of stack!")
# emit code to copy the register to the memory location.
if comment_str == None:
comment_str = "Stack machine: copy register to temp"
self.o(" movl %s, %s" % (reg, mem),
comment_str)
# Modify the element's stack machine position to reflect
# its new location.
self.stack[index] = mem
return reg
def _get_free_reg(self, valid_regs, preferred_reg=None):
"""Returns a free register that is in the valid_regs list. If
no registers are available, the most least-recently used
eligible one is freed (by moving its contents to a temporary
variable) and returned."""
# If we have a register free, return it.
if len(self.regs_free) > 0:
reg = None
if preferred_reg != None and preferred_reg in self.regs_free:
reg = preferred_reg
else:
for r in self.regs_free:
if r in valid_regs:
reg = r
if reg != None:
self.regs_free.remove(reg)
# If this register is a callee-save register that
# we haven't used before, add it to our list
# of used callee-save registers.
if reg in self.callee_save_regs and reg not in self.callee_save_regs_used:
self.callee_save_regs_used.append(reg)
return reg
# copy a register into a temp var and return the register.
return self._copy_reg_to_temp(valid_regs)
def _get_type_valid_regs(self, type):
"""Returns the valid registers that an element of the given
type can occupy. For instance, 8-bit chars should only be
placed in %eax/%ebx/%ecx/%edx because these are the only
registers with low-order byte sub-registers
(%al/%bl/%cl/%dl)."""
type_str = type.get_outer_string()
if type_str == 'char':
return self.byte_compat_regs
elif type_str in ['int', 'pointer']:
return self.all_regs
def push(self, type=None, preferred_reg=None, valid_regs=None):
"""Finds a free eligible register (or frees one if all are
being used) and returns it, pushing the register onto the
stack machine's stack.
This method associates the stack entry with the given Type
object; if none is supplied, then an 'int' type is used
by default.
If preferred_reg is passed, this function will try its
best to return preferred_reg, if it's available."""
if type == None:
type = self.default_type
self.type_stack.append(type)
if valid_regs == None:
valid_regs = self._get_type_valid_regs(type)
reg = self._get_free_reg(valid_regs, preferred_reg)
self.stack.append(reg)
return reg
def _coerce_type(self, curr_reg, from_type, to_type):
"""Attempts to coerce the element in the current register
from the given type to the given type."""
from_str = from_type.get_outer_string()
to_str = to_type.get_outer_string()
comment_str = "Implicit cast: %s -> %s" % (from_str, to_str)
if from_str == to_str:
return curr_reg
if from_str == 'char':
if to_str == 'int':
return curr_reg
elif from_str == 'int':
if to_str == 'char':
self.o(" movzbl %s, %s" % (self.lo(curr_reg),
curr_reg),
comment_str)
return curr_reg
def pop(self, type=None, valid_regs=None):
"""Pops the top element off the stack machine's stack, coerces
it to the given type if necessary, and returns a register in
which the element's value now resides.
If no type is specified, pop() returns the value of the
element as-is."""
prev_type = self.type_stack.pop()
if type != None:
if valid_regs == None:
valid_regs = self._get_type_valid_regs(type)
reg = self._pop(valid_regs)
return self._coerce_type(reg, prev_type, type)
else:
return self._pop(self.all_regs)
def _pop(self, valid_regs):
"""Pops the top element of the stack into a free register
that is also in valid_regs and returns the register name. If
no registers are free, the least recently used one is first
copied into a temporary variable and then used."""
loc = self.stack.pop()
# If the top of the stack is a register, just return the
# name of the register and add the register to our free
# register list.
if loc in valid_regs:
self.regs_almost_free.append(loc)
return loc
# Otherwise, copy the temp variable at the top of the stack
# into a free register, possibly requiring us to spill the
# current contents of the memory register into another temp
# variable.
reg = self._get_free_reg(valid_regs)
self.o(" movl %s, %s" % (loc, reg),
"Stack machine: copy temp to register")
# if our location was a register but not in valid_regs,
# make the register free for use.
if loc in self.all_regs:
self.regs_free.append(loc)
self.regs_almost_free.append(reg)
return reg
def peek(self):
"""Returns the top element of the stack, but doesn't pop
it. Note that this is not guaranteed to be a register; it
could be a memory location!"""
return self.stack[-1]
def is_empty(self):
"""Returns whether the stack machine is empty."""
return len(self.stack) == 0
def done(self):
"""Frees all registers that are marked as being in
intermediate use (i.e., have been pop()'d)."""
self.regs_free.extend(self.regs_almost_free)
self.regs_almost_free = []
def get_max_fp(self):
"""Returns the maximum point in the process' stack, relative
to the current function's frame pointer, that the stack
machine is using for temporary variables."""
return self.next_temp + WORD_SIZE
def lo(self, reg):
"""Returns the low-order byte of the given register. If the
register isn't byte-compatible (i.e., isn't %eax, %ebx, %ecx,
or %edx), then an exception is raised.
Example: stack.lo('%eax') == '%al'."""
if reg[0] == '$':
return reg
if reg not in self.byte_compat_regs:
raise Exception("Register %s is not byte-compatible!" % reg)
return '%' + reg[2] + 'l'
def force_type_change(self, type):
"""Forces a type change of the top element of the stack."""
self.type_stack[-1] = type
# ---------------------------------------------------------------
# CODE GENERATOR
# ---------------------------------------------------------------
class CodeGenVisitor(Visitor):
"""Visitor that generates x86 assembly code for the abstract
syntax tree."""
def __init__(self, file, show_comments=0):
"""Constructor. 'file' is the file object to output the
resulting code to. If 'show_comments' is true, then annotated
comments are produced for the generated assembly code."""
Visitor.__init__(self)
# The current label number we're on, for generating
# jump labels in the assembly code (e.g., 'LO', 'L1', etc).
self.__label = 0
# Current label number for generating string literal labels.
self.__str_literal_label = 0
# Current assembly code for string literals.
self.__str_literal_str = ""
# Whether we should show comments or not.
self.show_comments = show_comments
# The file we're outputting the generated code to.
self.file = file
# A hashtable of binary operators and the assembly
# instructions corresponding to them. Certain instructions
# are just the 'base' instruction and require a suffix
# corresponding to the size of the operands; for instance,
# addition can be accomplished with the 'addl' instruction
# for 32-bit integers and 'addb' for 8-bit integers.
#
# In such cases, the code adds the appropriate suffixes on its
# own.
self.binop_instructions = \
{ '==' : 'sete',
'!=' : 'setne',
'>=' : 'setge',
'<=' : 'setle',
'>' : 'setg',
'<' : 'setl',
'+' : 'add',
'-' : 'sub',
'*' : 'imul',
'=' : 'mov'
}
# Windows' C linkage prepends a '_' before symbol
# names, whereas Unix doesn't. This is particularly
# critical if the source file is linking to external
# libraries that we're not compiling. Figure out
# which one to use here.
import sys
if sys.platform == 'win32':
self.symbol_prepend = "_"
else:
self.symbol_prepend = ""
def new_label(self):
"""Generate a new jump label and return it."""
label = ".L%d" % self.__label
self.__label += 1
return label
def o(self, str, comment=None):
"""Output a line of assembly code to the output file,
with an optional annotated comment (if comments are
enabled)."""
if self.show_comments and comment != None:
comment = "# %s" % comment
self.curr_str += "%-35s %s\n" % (str, comment)
else:
if str == "":
return
self.curr_str += str + "\n"
def c(self, str, indent_amt=2):
"""Output a single-line comment to the output file, if
comments are enabled."""
indent = " " * indent_amt
if self.show_comments:
self.o("\n%s# %s\n" % (indent, str))
def vNodeList(self, node):
self._visitList(node.nodes)
def _empty_stack(self, node):
"""Pops the top value from the stack machine's stack and
discard it. This is used when a statement has a return
value (for instance, the line 'a = b + 1;') and its
return value has been pushed onto the stack but there's
nothing to pop it off."""
# if the statement was also an expression, then its return
# value is still on the stack, so empty it (throw away
# the return value).
if not self.stack.is_empty():
self.stack.pop(node.type)
self.stack.done()
if not self.stack.is_empty():
raise Exception("PANIC! Register stack isn't empty!")
def _accept_and_empty_stack(self, node):
"""Visit the node and then empty the stack machine of the
node's return value, if one exists."""
node.accept(self)
self._empty_stack(node)
def vStatementList(self, node):
for n in node.nodes:
self._accept_and_empty_stack(n)
def _generate_global_variable_definitions(self, node):
"""Generate and return a list of global variable
definitions."""
globals_str = ".global_vars:\n"
for symbol in node.symtab.entries.values():
symbol.compile_loc = self.symbol_prepend + symbol.name
if not symbol.type.is_function() and not symbol.extern:
globals_str += " .comm %s,%d\n" % \
(symbol.compile_loc, \
self._calc_var_size(symbol.type)*WEIRD_MULTIPLIER)
return globals_str
def vTranslationUnit(self, node):
"""Outputs the entire assembly source file."""
self.curr_str = ""
self.o("# Generated by c.py")
self.o("# Atul Varma (Spring 2004)\n")
self.o(" .text")
globals_str = self._generate_global_variable_definitions(node)
# Generate the main code.
self._visitList(node.nodes)
# Append global variable definitions.
self.o(globals_str)
# Append string literal definitions.
self.o(self.__str_literal_str)
# Output the entire file.
self.file.write(self.curr_str)
def _calc_var_size(self, type):
"""Calculate and return the size of the given type, in
bytes."""
type_str = type.get_outer_string()
if type_str == "int":
return INT_SIZE
elif type_str == "char":
return CHAR_SIZE
elif type_str == "pointer":
return WORD_SIZE
else:
self.error("Unknown type: %s" % type_str)
def _calc_var_align(self, type):
"""Calculate and return the alignment of the given type,
in bytes."""
return self._calc_var_size(type)
def _calc_function_var_addrs(self, symtab, last_fp_loc):
"""Calculate the addresses of all local variables in the
function and attach them to their respective symbols in
the function's symbol table(s)."""
self._calc_function_arg_addrs(symtab)
return self._calc_local_var_addrs(symtab.children[0], last_fp_loc)
def _calc_function_arg_addrs(self, symtab):
"""Calculate the addresses of all the arguments passed to
the function."""
for symbol in symtab.entries.values():
symbol.compile_loc = "%d(%%ebp)" % (WORD_SIZE*2+(symbol.param_num*WORD_SIZE))
if not symbol.is_used:
self.warning("function argument '%s' is never used." % symbol.name)
def _calc_local_var_addrs(self, symtab, last_fp_loc):
"""Calculate the locations of all the local variables defined
in the function's body and all nested scopes therein.
This model of allocation assumes a 'worst-case' scenario
where all branches and nested scopes of the function are
executed; thus the space required for all the local
variables is allocated on the process' stack at the
beginning of the function.
Note, however, that lexical scopes that cannot exist
at the same time may overlap in memory. For instance,
examine the following 'if' statement:
if (a > 1) {
int i;
} else {
int j;
}
Here 'i' and 'j' will actually occupy the same place in
memory because it is impossible for both of them to
exist in memory at the same time."""
for symbol in symtab.entries.values():
if symbol.extern:
symbol.compile_loc = self.symbol_prepend + symbol.name
continue
last_fp_loc -= self._calc_var_size(symbol.type)
# adjust location for alignment
align = self._calc_var_align(symbol.type)
bytes_overboard = (-last_fp_loc) % align
if bytes_overboard != 0:
last_fp_loc -= (align - bytes_overboard)
symbol.compile_loc = "%d(%%ebp)" % last_fp_loc
if not symbol.is_used:
self.warning("local variable '%s' is never used." % symbol.name)
max_last_fp = last_fp_loc
for kid in symtab.children:
curr_last_fp = self._calc_local_var_addrs(kid, last_fp_loc)
if curr_last_fp < max_last_fp:
max_last_fp = curr_last_fp
# adjust location for alignment, to keep the stack aligned
# on a word-sized boundary.
align = self._calc_var_align(cparse.PointerType())
bytes_overboard = (-max_last_fp) % align
if bytes_overboard != 0:
max_last_fp -= (align - bytes_overboard)
return max_last_fp
def _fill_line(self, str, width=70):
"""Fills a string to the given width with the '-'
character."""
extra = "-" * (width-1-len(str))
return str + " " + extra
def vFunctionDefn(self, node):
"""Output the assembly code for a function."""
self.break_labels = []
self.continue_labels = []
self.curr_func_end_label = self.new_label() + "_function_end"
# Calculate the base size of the stack frame (not including
# space for the stack machine's temporary variables).
stack_frame_size = self._calc_function_var_addrs(node.symtab, 0)
line = self._fill_line("BEGIN FUNCTION: %s()" % node.name)
self.c("%s\n"
"#\n"
"# Function type: %s" %
(line, node.type.get_string()), 0)
if not node.static:
self.o(" .global %s" % node.compile_loc)
self.o("%s:" % node.compile_loc)
self.o(" pushl %ebp", "Save old frame pointer")
self.o(" movl %esp, %ebp", "Set new frame pointer")
# Create a new stack machine for this function.
self.stack = x86Registers(self, stack_frame_size)
# Generate assembly code for the function. Here we
# perform a little hack so that we can generate the
# code for the function into a separate string, and then
# insert it into our code later on.
old_str = self.curr_str
self.curr_str = ""
node.body.accept(self)
function_str = self.curr_str
self.curr_str = old_str
# Figure out the final size of the stack frame, taking into
# account the stack machine's temporary variables, and
# insert the code at the beginning of the function.
if self.stack.get_max_fp() != 0:
self.o(" subl $%d, %%esp" % (-self.stack.get_max_fp()),
"Allocate space for local+temp vars")
# Save any callee-save registers that may have been used.
self.stack.save_callee_saves()
# Add the previously-generated assembly code for the function.
self.curr_str += function_str
self.o("%s:" % self.curr_func_end_label)
# Restore any callee-save registers that may have been used.
self.stack.load_callee_saves()
self.o(" movl %ebp, %esp", "Deallocate stack frame")
self.o(" popl %ebp", "Restore old stack frame")
self.o(" ret\n")
line = self._fill_line("END FUNCTION: %s()" % node.name)
self.c(line, 0)
def vCompoundStatement(self, node):
node.statement_list.accept(self)
def vIfStatement(self, node):
done_label = self.new_label() + "_done"
if not node.else_stmt.is_null():
else_label = self.new_label() + "_else"
else:
else_label = done_label
self.c("IF statment - begin")
node.expr.accept(self)
comparer = self.stack.pop()
self.stack.done()
self.o(" testl %s, %s" % (comparer, comparer), "Test the result")
self.o(" jz %s" % else_label,
"If result is zero, jump to else clause")
self.c("IF statment - THEN clause - begin")
self._accept_and_empty_stack(node.then_stmt)
self.c("IF statment - THEN clause - end")
self.o(" jmp %s" % done_label)
if not node.else_stmt.is_null():
self.c("IF statment - ELSE clause - begin")
self.o("%s:" % else_label)
self._accept_and_empty_stack(node.else_stmt)
self.c("IF statment - ELSE clause - end")
self.o("%s:" % done_label)
self.c("IF statment - end")
def _push_loop_labels(self, break_label, continue_label):
"""Pushes new values of labels to jump to for 'break' and
'continue' statements."""
self.break_labels.append(break_label)
self.continue_labels.append(continue_label)
def _pop_loop_labels(self):
"""Restores old values of labels to jump to for 'break' and
'continue' statements."""
self.break_labels.pop()
self.continue_labels.pop()
def vWhileLoop(self, node):
test_label = self.new_label() + "_test"
done_label = self.new_label() + "_done"
self._push_loop_labels(break_label=done_label,
continue_label=test_label)
self.c("WHILE loop - begin")
self.o("%s:" % test_label)
node.expr.accept(self)
comparer = self.stack.pop()
self.stack.done()
self.o(" testl %s, %s" % (comparer, comparer), "Test the result")
self.o(" jz %s" % done_label,
"If result is zero, leave while loop")
self._accept_and_empty_stack(node.stmt)
self.o(" jmp %s" % test_label, "Jump to start of while loop")
self.o("%s:" % done_label)
self.c("WHILE loop - end")
self._pop_loop_labels()
def vForLoop(self, node):
test_label = self.new_label() + "_test"
done_label = self.new_label() + "_done"
self._push_loop_labels(break_label=done_label,
continue_label=test_label)
self.c("FOR loop - begin")
self._accept_and_empty_stack(node.begin_stmt)
self.o("%s:" % test_label)
node.expr.accept(self)
comparer = self.stack.pop()
self.stack.done()
self.o(" testl %s, %s" % (comparer, comparer), "Test the result")
self.o(" jz %s" % done_label,
"If result is zero, leave for loop")
self._accept_and_empty_stack(node.stmt)
self._accept_and_empty_stack(node.end_stmt)
self.o(" jmp %s" % test_label, "Jump to start of for loop")
self.o("%s:" % done_label)
self.c("FOR loop - end")
self._pop_loop_labels()
def vBreakStatement(self, node):
self.o(" jmp %s" % self.break_labels[-1],
"Loop: break statement")
def vContinueStatement(self, node):
self.o(" jmp %s" % self.continue_labels[-1],
"Loop: continue statement")
def _get_new_str_literal_label(self, str):
"""Create a new string literal label for the given string,
generate (but do not yet emit) the assembly for it, and return
the name of the new label."""
label_str = "LC%d" % self.__str_literal_label
str = str.replace('\n', '\\12')
self.__str_literal_str += """%s:\n .ascii "%s\\0"\n""" % (label_str, str)
self.__str_literal_label += 1
return label_str
def vStringLiteral(self, node):
label_str = self._get_new_str_literal_label(node.get_str())
# Make a little preview of the literal in the annotated
# comments.
COMMENT_CHARS = 7
comment_label = node.get_sanitized_str()
if len(comment_label) > COMMENT_CHARS:
comment_label = "%s..." % comment_label[0:COMMENT_CHARS]
self.o(" movl $%s, %s" % (label_str,
self.stack.push(node.type)),
"Get addr of string literal '%s'" % comment_label)
def vConst(self, node):
self.o(" movl $%d, %s" % (node.value,
self.stack.push(node.type)),
"Load numeric constant %d" % node.value)
def vId(self, node):
# If we're only supposed to push our address on the stack, not
# our actual value, then do that and exit.
if node.output_addr:
self.o(" leal %s, %s" % (node.symbol.compile_loc,
self.stack.push()),
"Get address of %s" % node.symbol.name)
return
type_str = node.type.get_outer_string()
if type_str in ['pointer', 'int']:
instr = 'movl'
elif type_str == 'char':
instr = 'movzbl'
self.o(" %s %s, %s" % (instr, node.symbol.compile_loc,
self.stack.push(node.type)),
"Get value of %s" % node.symbol.name)
def vArrayExpression(self, node):
node.expr.accept(self)
node.index.accept(self)
reg_index = self.stack.pop(node.index.type)
reg_expr = self.stack.pop(node.expr.type)
reg_to = self.stack.push(node.type)
size = self._calc_var_size(node.type)
addr_str = "(%s,%s,%d)" % (reg_expr, reg_index, size)
self.stack.done()
if node.output_addr:
self.o(" leal %s, %s" % (addr_str, reg_to),
"Load addr of pointer array index")
else:
type_str = node.type.get_outer_string()
if type_str in ['int', 'pointer']:
instr = 'movl'
elif type_str == 'char':
instr = 'movzbl'
self.o(" %s %s, %s" % (instr, addr_str, reg_to),
"Pointer array index dereference")
def vFunctionExpression(self, node):
"""Generates assembly for calling a function."""
self.c("FUNCTION CALL to %s() - begin" %
node.function.symbol.name)
# If we're using any caller-save registers, free them up.
self.stack.save_caller_saves()
# We need to temporarily reverse the order of the function's
# arguments because we need to push them onto the stack
# in reverse order.
node.arglist.nodes.reverse()
argnum = len(node.arglist.nodes)
for arg in node.arglist.nodes:
arg_reg = self._accept_and_pop(arg)
self.o(" pushl %s" % arg_reg, "Push arg %d" % argnum)
self.stack.done()
argnum -= 1
node.arglist.nodes.reverse()
self.o(" call %s" % node.function.symbol.compile_loc,
"Call %s()" % node.function.symbol.name)
# The function will place its return value in register %eax.
# So, we'll push a register from the stack and ask it to
# give us %eax.
result = self.stack.push(node.function.symbol.type.get_return_type(), preferred_reg='%eax')
# If we got %eax, don't do anything, because our return
# value is already in there. Otherwise, move it.
#
# (Note that in the current implementation of the stack
# machine, we should always get %eax.)
if result != '%eax':
self.o(" movl %%eax, %s" % result, "Copy return value")
arg_stack_size = (len(node.arglist.nodes)*WORD_SIZE)
if arg_stack_size > 0:
self.o(" addl $%d, %%esp" % arg_stack_size,
"Deallocate argument stack")
self.c("FUNCTION CALL to %s() - end" %
node.function.symbol.name)
def vReturnStatement(self, node):
return_reg = self._accept_and_pop(node.expr)
self.o(" movl %s, %%eax" % return_reg, "Set return value")
self.o(" jmp %s" % self.curr_func_end_label, "Exit function")
self.stack.done()
def _accept_and_pop(self, node):
"""Accept the given node and pop its value into a register and
return the register. Implicit type conversion is performed,
if necessary, by the stack machine.
Also, if the node is determined to be a numeric constant,
the literal value of the constant (e.g., '$15') is returned,
for purposes of optimization."""
if node.is_const():
return "$%d" % node.value
else:
node.accept(self)
return self.stack.pop(node.coerce_to_type)
def _binop_assign(self, node):
"""Performs an assignment operation (=, +=, etc) on the given
Binop node."""
node.left.accept(self)
right_reg = self._accept_and_pop(node.right)
left_reg = self.stack.pop()
instr = self.binop_instructions[node.op[0]]
instr += self._type_suffix(node.type)
type_str = node.type.get_outer_string()
if type_str == 'char':
right_reg = self.stack.lo(right_reg)
self.o(" %s %s, (%s)" % (instr, right_reg, left_reg),
"Perform assignment '%s'" % node.op)
# NOTE: Wow, this makes for insanely inefficient code, especially
# when the result of the operation isn't being used.
if type_str in ['int', 'pointer']:
instr = 'movl'
elif type_str == 'char':
instr = 'movzbl'
self.o(" %s (%s), %s" % (instr, left_reg,
self.stack.push(node.type)),
"Copy assignment result to register")
self.stack.done()
def _type_suffix(self, type):
"""Returns the assembly instruction suffix for the given type;
'l' for 32-bit types, 'b' for 8-bit types, etc..."""
type_str = type.get_outer_string()
if type_str in ['int', 'pointer']:
return 'l'
elif type_str == 'char':
return 'b'
def _binop_arith(self, node):
"""Performs an arithmetic operation (+, -, etc) on the given
Binop node."""
node.left.accept(self)
right_reg = self._accept_and_pop(node.right)
left_reg = self.stack.pop(node.left.coerce_to_type)
instr = self.binop_instructions[node.op] + \
self._type_suffix(node.type)
type_str = node.type.get_outer_string()
if type_str == 'char':
r_reg = self.stack.lo(right_reg)
l_reg = self.stack.lo(left_reg)
else:
r_reg = right_reg
l_reg = left_reg
self.o(" %s %s, %s" % (instr, r_reg, l_reg),
"Perform '%s'" % node.op)
self.stack.done()
# Here we are relying on the fact that left_reg is now free
# from the last pop(), so we should be able to push it
# back onto the stack machine.
new_reg = self.stack.push(node.type, preferred_reg=left_reg)
if new_reg != left_reg:
raise Exception("PANIC! Binop push() isn't same as last pop()!")
def _binop_compare(self, node):
"""Performs a comparison operation (>, ==, etc) on the given
Binop node."""
node.left.accept(self)
right_reg = self._accept_and_pop(node.right)
left_reg = self.stack.pop(node.left.coerce_to_type)
self.stack.done()
self.o(" cmpl %s, %s" % (right_reg, left_reg),
"Compare %s to %s" % (left_reg, right_reg))
# TODO: this could cause errors, if push() generates
# mov instructions... not sure if mov instructions
# change the flags though, they probably shouldn't
# since they're not arithmetic operations.
byte_reg = self.stack.push(cparse.BaseType('char'))
lo = self.stack.lo(byte_reg)
self.o(" %s %s" % (self.binop_instructions[node.op],
lo),
"Perform '%s'" % node.op)
self.o(" movzbl %s, %s" % (lo, byte_reg),
"Zero-extend the boolean result")
def vBinop(self, node):
if node.op in cparse.Binop.ASSIGN_OPS:
self._binop_assign(node)
elif node.op in ['+','-','*']:
self._binop_arith(node)
elif node.op in ['==', '!=', '<', '>', '<=', '>=']:
self._binop_compare(node)
def vNegative(self, node):
node.expr.accept(self)
self.o(" negl %s" % self.stack.peek(),
"Perform unary negation")
def vPointer(self, node):
node.expr.accept(self)
if node.output_addr:
self.o("", "(Getting pointer target addr via '*')")
return
reg_from = self.stack.pop(node.expr.type)
reg_to = self.stack.push(node.type)
type_str = node.type.get_outer_string()
if type_str in ['int', 'pointer']:
instr = 'movl'
elif type_str == 'char':
instr = 'movzbl'
self.o(" %s (%s), %s" % (instr, reg_from, reg_to),
"Pointer dereference")
self.stack.done()
def vAddrOf(self, node):
node.expr.accept(self)
self.stack.force_type_change(node.type)
self.o("", "(Address-of operator '&' used here)")
# ---------------------------------------------------------------
# End of cx86.py
# ---------------------------------------------------------------
import argparse, sys, os.path
import ply.yacc as yacc
from . import cparse, cvisitors, cx86
class CompileError (Exception) :
"Exception raised when there's been a compilation error."
pass
class Compiler (object) :
"""This object encapsulates the front-end for the compiler and
serves as a facade interface to the 'meat' of the compiler
underneath."""
def __init__ (self) :
self.total_errors = 0
self.total_warnings = 0
def _parse (self) :
"Parses the source code."
self.ast = yacc.parse(self.code)
def _compile_phase (self, visitor) :
"Applies a visitor to the abstract syntax tree."
visitor.visit(self.ast)
self.total_errors += visitor.errors
self.total_warnings += visitor.warnings
if visitor.has_errors():
raise CompileError()
def _do_compile (self, outfile, ast_file) :
"""Compiles the code to the given file object. Enabling
show_ast prints out the abstract syntax tree."""
self._parse()
self._compile_phase(cvisitors.SymtabVisitor())
self._compile_phase(cvisitors.TypeCheckVisitor())
self._compile_phase(cvisitors.FlowControlVisitor())
self._compile_phase(cx86.CodeGenVisitor(outfile))
if ast_file is not None:
self._compile_phase(cvisitors.ASTPrinterVisitor(ast_file))
def _print_stats (self) :
"Prints the total number of errors/warnings from compilation."
print("%d errors, %d warnings." % (self.total_errors, self.total_warnings))
def compile (self, code, outfile, show_ast) :
"Compiles the given code string to the given file object."
self.code = code
try:
self._do_compile(outfile, show_ast)
except cparse.ParseError:
print("Errors encountered, bailing.")
return 1
except CompileError:
self._print_stats()
print("Errors encountered, bailing.")
return 1
self._print_stats()
print("Compile successful.")
return 0
def main (args=None) :
parser = argparse.ArgumentParser(prog="cct")
parser.add_argument("-o", action="store", metavar="PATH",
type=argparse.FileType('w'), default=sys.stdout,
help="write output to PATH")
parser.add_argument("--ast", action="store_true", default=False,
help="dump AST for each C file")
parser.add_argument("source", nargs="+", metavar="PATH",
help="C source files(s) to compile")
args = parser.parse_args(args)
for src in args.source :
if args.ast :
ast_file = open(os.path.splitext(src)[0] + ".ast", "w")
else :
ast_file = None
retval = Compiler().compile(open(src).read(), args.o, ast_file)
if ast_file is not None :
ast_file.close()
if retval != 0 :
sys.exit(retval)
# parsetab.py
# This file is automatically generated. Do not edit.
# pylint: disable=W,C,R
_tabversion = '3.10'
_lr_method = 'LALR'
_lr_signature = 'rightELSEAMPERSAND ARROW ASSIGN ASTERISK AUTO BREAK CARET CASE CHAR CHARACTER COLON COMMA CONST CONTINUE DEFAULT DIV DO DOT DOUBLE DOUBLE_AMPERSAND DOUBLE_MINUS DOUBLE_PIPE DOUBLE_PLUS ELLIPSIS ELSE ENUM EQ EQ_AMPERSAND EQ_CARET EQ_DIV EQ_MINUS EQ_MODULO EQ_PIPE EQ_PLUS EQ_SHIFT_LEFT EQ_SHIFT_RIGHT EQ_TIMES EXCLAMATION EXTERN FLOAT FNUMBER FOR GOTO GREATER GREATER_EQ ID IF INT INUMBER LBRACE LBRACKET LESS LESS_EQ LONG LPAREN MINUS MODULO NOT_EQ PIPE PLUS POUND QUESTION RBRACE RBRACKET REGISTER RETURN RPAREN SEMICOLON SHIFT_LEFT SHIFT_RIGHT SHORT SIGNED SIZEOF STATIC STRING STRUCT SWITCH TILDE TIMES TYPEDEF UNION UNSIGNED VOID VOLATILE WHILEtranslation_unit : external_declarationtranslation_unit : translation_unit external_declarationexternal_declaration : function_definition\n | declarationfunction_definition : type_specifier declarator compound_statementfunction_definition : STATIC type_specifier declarator compound_statementdeclaration : type_specifier declarator SEMICOLONdeclaration : EXTERN type_specifier declarator SEMICOLONdeclaration_list_opt : emptydeclaration_list_opt : declaration_listdeclaration_list : declarationdeclaration_list : declaration_list declarationtype_specifier : INT\n | CHARdeclarator : direct_declaratordeclarator : ASTERISK declaratordirect_declarator : IDdirect_declarator : direct_declarator LPAREN parameter_type_list RPARENdirect_declarator : direct_declarator LPAREN RPARENparameter_type_list : parameter_listparameter_type_list : parameter_list COMMA ELLIPSISparameter_list : parameter_declarationparameter_list : parameter_list COMMA parameter_declarationparameter_declaration : type_specifier declaratorcompound_statement : LBRACE declaration_list_opt statement_list RBRACEcompound_statement : LBRACE declaration_list_opt RBRACEexpression_statement : expression SEMICOLONexpression : equality_expressionexpression : equality_expression ASSIGN expression\n | equality_expression EQ_PLUS expression\n | equality_expression EQ_MINUS expressionequality_expression : relational_expressionequality_expression : equality_expression EQ relational_expression\n | equality_expression NOT_EQ relational_expressionrelational_expression : additive_expressionrelational_expression : relational_expression LESS additive_expression\n | relational_expression GREATER additive_expression\n | relational_expression LESS_EQ additive_expression\n | relational_expression GREATER_EQ additive_expressionpostfix_expression : primary_expressionpostfix_expression : postfix_expression LPAREN argument_expression_list RPARENpostfix_expression : postfix_expression LPAREN RPARENpostfix_expression : postfix_expression LBRACKET expression RBRACKETargument_expression_list : expressionargument_expression_list : argument_expression_list COMMA expressionunary_expression : postfix_expressionunary_expression : MINUS unary_expressionunary_expression : EXCLAMATION unary_expressionunary_expression : ASTERISK unary_expressionunary_expression : AMPERSAND unary_expressionmult_expression : unary_expressionmult_expression : mult_expression ASTERISK unary_expression\n | mult_expression DIV unary_expression\n | mult_expression MODULO unary_expressionadditive_expression : mult_expressionadditive_expression : additive_expression PLUS mult_expression\n | additive_expression MINUS mult_expressionprimary_expression : IDprimary_expression : INUMBERprimary_expression : FNUMBERprimary_expression : CHARACTERprimary_expression : string_literalprimary_expression : LPAREN expression RPARENstring_literal : STRINGstring_literal : string_literal STRINGstatement : compound_statement\n | expression_statement\n | selection_statement\n | iteration_statement\n | jump_statementjump_statement : RETURN SEMICOLONjump_statement : RETURN expression SEMICOLONjump_statement : BREAK SEMICOLONjump_statement : CONTINUE SEMICOLONiteration_statement : WHILE LPAREN expression RPAREN statementiteration_statement : FOR LPAREN expression_statement expression_statement expression RPAREN statementselection_statement : IF LPAREN expression RPAREN statementselection_statement : IF LPAREN expression RPAREN statement ELSE statementstatement_list : statementstatement_list : statement_list statementempty :'
_lr_action_items = {'RBRACE':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,137,139,143,144,],[-81,-7,-8,-9,-11,36,-10,-26,-70,-67,-69,-79,-66,97,-68,-12,-74,-71,-73,-27,-25,-80,-72,-77,-75,-78,-76,]),'ASSIGN':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,100,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-33,-42,-41,-43,]),'STATIC':([0,1,2,5,9,16,20,21,24,25,36,97,],[4,-4,-1,-3,4,-2,-5,-7,-6,-8,-26,-25,]),'CHARACTER':([19,21,25,26,27,28,29,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,52,-10,-26,-70,-67,52,52,52,52,-69,52,-79,52,-66,52,-68,-12,52,52,52,52,-74,52,52,52,52,52,52,-71,-73,-27,52,52,-25,-80,52,52,52,52,52,52,52,52,-72,52,52,52,52,-77,-75,52,52,-78,-76,]),'IF':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,37,-10,-26,-70,-67,-69,-79,-66,37,-68,-12,-74,-71,-73,-27,-25,-80,-72,37,37,-77,-75,37,37,-78,-76,]),'RETURN':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,50,-10,-26,-70,-67,-69,-79,-66,50,-68,-12,-74,-71,-73,-27,-25,-80,-72,50,50,-77,-75,50,50,-78,-76,]),'AMPERSAND':([19,21,25,26,27,28,29,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,49,-10,-26,-70,-67,49,49,49,49,-69,49,-79,49,-66,49,-68,-12,49,49,49,49,-74,49,49,49,49,49,49,-71,-73,-27,49,49,-25,-80,49,49,49,49,49,49,49,49,-72,49,49,49,49,-77,-75,49,49,-78,-76,]),'CONTINUE':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,39,-10,-26,-70,-67,-69,-79,-66,39,-68,-12,-74,-71,-73,-27,-25,-80,-72,39,39,-77,-75,39,39,-78,-76,]),'FOR':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,40,-10,-26,-70,-67,-69,-79,-66,40,-68,-12,-74,-71,-73,-27,-25,-80,-72,40,40,-77,-75,40,40,-78,-76,]),'MINUS':([19,21,25,26,27,28,29,36,38,42,43,44,45,46,48,49,50,51,52,53,54,55,57,59,60,61,62,63,65,66,67,68,69,74,75,76,77,78,79,80,81,83,84,85,86,87,88,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,109,110,111,112,114,115,116,117,118,119,120,121,128,131,132,133,134,135,136,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,53,-10,-26,-55,-70,-67,53,-59,53,-64,53,53,-69,-61,53,-62,-79,53,-40,95,-66,-51,53,-68,-46,-58,-60,-12,53,53,53,53,-74,53,53,-49,53,53,53,53,-50,-71,-47,-65,-73,-48,-27,53,53,-25,-80,53,53,53,53,53,53,53,-54,-52,-53,53,-63,95,95,95,95,-72,-57,-56,-42,53,53,53,53,-41,-43,-77,-75,53,53,-78,-76,]),'WHILE':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,41,-10,-26,-70,-67,-69,-79,-66,41,-68,-12,-74,-71,-73,-27,-25,-80,-72,41,41,-77,-75,41,41,-78,-76,]),'LBRACE':([12,13,14,17,19,21,23,25,26,27,28,29,33,36,42,43,51,55,61,63,65,69,71,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[19,-17,-15,19,-81,-7,-16,-8,-9,-11,19,-10,-19,-26,-70,-67,-69,-79,-66,19,-68,-12,-18,-74,-71,-73,-27,-25,-80,-72,19,19,-77,-75,19,19,-78,-76,]),'EQ':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,103,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-33,-42,-41,-43,]),'MODULO':([38,45,48,52,54,59,62,66,67,68,81,87,90,91,93,109,110,111,114,120,121,128,135,136,],[75,-59,-64,-61,-62,-40,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,75,75,-42,-41,-43,]),'GREATER':([38,45,47,48,52,54,59,60,62,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,84,-64,-61,-62,-40,-35,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,84,84,-42,-41,-43,]),'LBRACKET':([45,48,52,54,59,66,67,68,91,114,128,135,136,],[-59,-64,-61,-62,-40,105,-58,-60,-65,-63,-42,-41,-43,]),'DIV':([38,45,48,52,54,59,62,66,67,68,81,87,90,91,93,109,110,111,114,120,121,128,135,136,],[77,-59,-64,-61,-62,-40,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,77,77,-42,-41,-43,]),'LESS':([38,45,47,48,52,54,59,60,62,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,85,-64,-61,-62,-40,-35,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,85,85,-42,-41,-43,]),'RPAREN':([13,14,22,23,31,32,33,35,38,45,47,48,52,54,59,60,62,64,66,67,68,71,72,81,82,87,90,91,93,104,106,107,108,109,110,111,113,114,115,116,117,118,120,121,122,123,124,125,126,127,128,129,135,136,138,140,],[-17,-15,33,-16,-22,71,-19,-20,-55,-59,-32,-64,-61,-62,-40,-35,-51,-28,-46,-58,-60,-18,-24,-49,114,-50,-47,-65,-48,128,-23,-21,131,-54,-52,-53,133,-63,-39,-37,-36,-38,-57,-56,-34,-29,-30,-31,-33,135,-42,-44,-41,-43,142,-45,]),'EQ_MINUS':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,102,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-33,-42,-41,-43,]),'SEMICOLON':([12,13,14,18,23,33,38,39,45,47,48,50,52,54,56,58,59,60,62,64,66,67,68,70,71,81,87,89,90,91,93,109,110,111,114,115,116,117,118,120,121,122,123,124,125,126,128,135,136,],[21,-17,-15,25,-16,-19,-55,78,-59,-32,-64,88,-61,-62,92,94,-40,-35,-51,-28,-46,-58,-60,21,-18,-49,-50,119,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-29,-30,-31,-33,-42,-41,-43,]),'BREAK':([19,21,25,26,27,28,29,36,42,43,51,55,61,63,65,69,78,88,92,94,97,98,119,131,133,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,56,-10,-26,-70,-67,-69,-79,-66,56,-68,-12,-74,-71,-73,-27,-25,-80,-72,56,56,-77,-75,56,56,-78,-76,]),'ASTERISK':([3,6,8,10,11,15,19,21,25,26,27,28,29,30,34,36,38,42,43,44,45,46,48,49,50,51,52,53,54,55,57,59,61,62,63,65,66,67,68,69,74,75,76,77,78,79,80,81,83,84,85,86,87,88,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,109,110,111,112,114,119,120,121,128,131,132,133,134,135,136,137,139,141,142,143,144,],[-13,-14,15,15,15,15,-81,-7,-8,-9,-11,44,-10,15,15,-26,76,-70,-67,44,-59,44,-64,44,44,-69,-61,44,-62,-79,44,-40,-66,-51,44,-68,-46,-58,-60,-12,44,44,44,44,-74,44,44,-49,44,44,44,44,-50,-71,-47,-65,-73,-48,-27,44,44,-25,-80,44,44,44,44,44,44,44,-54,-52,-53,44,-63,-72,76,76,-42,44,44,44,44,-41,-43,-77,-75,44,44,-78,-76,]),'INUMBER':([19,21,25,26,27,28,29,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,45,-10,-26,-70,-67,45,45,45,45,-69,45,-79,45,-66,45,-68,-12,45,45,45,45,-74,45,45,45,45,45,45,-71,-73,-27,45,45,-25,-80,45,45,45,45,45,45,45,45,-72,45,45,45,45,-77,-75,45,45,-78,-76,]),'GREATER_EQ':([38,45,47,48,52,54,59,60,62,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,83,-64,-61,-62,-40,-35,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,83,83,-42,-41,-43,]),'LPAREN':([13,14,19,21,25,26,27,28,29,33,36,37,40,41,42,43,44,45,46,48,49,50,51,52,53,54,55,57,59,61,63,65,66,67,68,69,71,74,75,76,77,78,79,80,83,84,85,86,88,91,92,94,95,96,97,98,99,100,101,102,103,104,105,112,114,119,128,131,132,133,134,135,136,137,139,141,142,143,144,],[-17,22,-81,-7,-8,-9,-11,46,-10,-19,-26,74,79,80,-70,-67,46,-59,46,-64,46,46,-69,-61,46,-62,-79,46,-40,-66,46,-68,104,-58,-60,-12,-18,46,46,46,46,-74,46,46,46,46,46,46,-71,-65,-73,-27,46,46,-25,-80,46,46,46,46,46,46,46,46,-63,-72,-42,46,46,46,46,-41,-43,-77,-75,46,46,-78,-76,]),'LESS_EQ':([38,45,47,48,52,54,59,60,62,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,86,-64,-61,-62,-40,-35,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,86,86,-42,-41,-43,]),'COMMA':([13,14,23,31,33,35,38,45,47,48,52,54,59,60,62,64,66,67,68,71,72,81,87,90,91,93,106,109,110,111,114,115,116,117,118,120,121,122,123,124,125,126,127,128,129,135,136,140,],[-17,-15,-16,-22,-19,73,-55,-59,-32,-64,-61,-62,-40,-35,-51,-28,-46,-58,-60,-18,-24,-49,-50,-47,-65,-48,-23,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-29,-30,-31,-33,134,-42,-44,-41,-43,-45,]),'ELSE':([36,42,43,51,61,65,78,88,92,94,97,119,137,139,143,144,],[-26,-70,-67,-69,-66,-68,-74,-71,-73,-27,-25,-72,141,-75,-78,-76,]),'INT':([0,1,2,4,5,7,9,16,19,20,21,22,24,25,27,29,36,69,73,97,],[3,-4,-1,3,-3,3,3,-2,3,-5,-7,3,-6,-8,-11,3,-26,-12,3,-25,]),'NOT_EQ':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,99,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-33,-42,-41,-43,]),'PLUS':([38,45,48,52,54,59,60,62,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,128,135,136,],[-55,-59,-64,-61,-62,-40,96,-51,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,96,96,96,96,-57,-56,-42,-41,-43,]),'STRING':([19,21,25,26,27,28,29,36,42,43,44,46,48,49,50,51,53,54,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,91,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,48,-10,-26,-70,-67,48,48,-64,48,48,-69,48,91,-79,48,-66,48,-68,-12,48,48,48,48,-74,48,48,48,48,48,48,-71,-65,-73,-27,48,48,-25,-80,48,48,48,48,48,48,48,48,-72,48,48,48,48,-77,-75,48,48,-78,-76,]),'$end':([1,2,5,9,16,20,21,24,25,36,97,],[-4,-1,-3,0,-2,-5,-7,-6,-8,-26,-25,]),'CHAR':([0,1,2,4,5,7,9,16,19,20,21,22,24,25,27,29,36,69,73,97,],[6,-4,-1,6,-3,6,6,-2,6,-5,-7,6,-6,-8,-11,6,-26,-12,6,-25,]),'EXTERN':([0,1,2,5,9,16,19,20,21,24,25,27,29,36,69,97,],[7,-4,-1,-3,7,-2,7,-5,-7,-6,-8,-11,7,-26,-12,-25,]),'RBRACKET':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,123,124,125,126,128,130,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,-28,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-29,-30,-31,-33,-42,136,-41,-43,]),'FNUMBER':([19,21,25,26,27,28,29,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,68,-10,-26,-70,-67,68,68,68,68,-69,68,-79,68,-66,68,-68,-12,68,68,68,68,-74,68,68,68,68,68,68,-71,-73,-27,68,68,-25,-80,68,68,68,68,68,68,68,68,-72,68,68,68,68,-77,-75,68,68,-78,-76,]),'ELLIPSIS':([73,],[107,]),'EQ_PLUS':([38,45,47,48,52,54,59,60,62,64,66,67,68,81,87,90,91,93,109,110,111,114,115,116,117,118,120,121,122,126,128,135,136,],[-55,-59,-32,-64,-61,-62,-40,-35,-51,101,-46,-58,-60,-49,-50,-47,-65,-48,-54,-52,-53,-63,-39,-37,-36,-38,-57,-56,-34,-33,-42,-41,-43,]),'EXCLAMATION':([19,21,25,26,27,28,29,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-81,-7,-8,-9,-11,57,-10,-26,-70,-67,57,57,57,57,-69,57,-79,57,-66,57,-68,-12,57,57,57,57,-74,57,57,57,57,57,57,-71,-73,-27,57,57,-25,-80,57,57,57,57,57,57,57,57,-72,57,57,57,57,-77,-75,57,57,-78,-76,]),'ID':([3,6,8,10,11,15,19,21,25,26,27,28,29,30,34,36,42,43,44,46,49,50,51,53,55,57,61,63,65,69,74,75,76,77,78,79,80,83,84,85,86,88,92,94,95,96,97,98,99,100,101,102,103,104,105,112,119,131,132,133,134,137,139,141,142,143,144,],[-13,-14,13,13,13,13,-81,-7,-8,-9,-11,67,-10,13,13,-26,-70,-67,67,67,67,67,-69,67,-79,67,-66,67,-68,-12,67,67,67,67,-74,67,67,67,67,67,67,-71,-73,-27,67,67,-25,-80,67,67,67,67,67,67,67,67,-72,67,67,67,67,-77,-75,67,67,-78,-76,]),}
_lr_action = {}
for _k, _v in _lr_action_items.items():
for _x,_y in zip(_v[0],_v[1]):
if not _x in _lr_action: _lr_action[_x] = {}
_lr_action[_x][_k] = _y
del _lr_action_items
_lr_goto_items = {'mult_expression':([28,46,50,63,74,79,80,83,84,85,86,95,96,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[38,38,38,38,38,38,38,38,38,38,38,120,121,38,38,38,38,38,38,38,38,38,38,38,38,38,38,]),'declarator':([8,10,11,15,30,34,],[12,17,18,23,70,72,]),'translation_unit':([0,],[9,]),'additive_expression':([28,46,50,63,74,79,80,83,84,85,86,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[60,60,60,60,60,60,60,115,116,117,118,60,60,60,60,60,60,60,60,60,60,60,60,60,60,]),'jump_statement':([28,63,131,133,141,142,],[42,42,42,42,42,42,]),'declaration_list_opt':([19,],[28,]),'string_literal':([28,44,46,49,50,53,57,63,74,75,76,77,79,80,83,84,85,86,95,96,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,]),'iteration_statement':([28,63,131,133,141,142,],[51,51,51,51,51,51,]),'argument_expression_list':([104,],[127,]),'declaration_list':([19,],[29,]),'parameter_type_list':([22,],[32,]),'expression_statement':([28,63,79,112,131,133,141,142,],[43,43,112,132,43,43,43,43,]),'statement':([28,63,131,133,141,142,],[55,98,137,139,143,144,]),'parameter_list':([22,],[35,]),'declaration':([0,9,19,29,],[1,1,27,69,]),'expression':([28,46,50,63,74,79,80,100,101,102,104,105,112,131,132,133,134,141,142,],[58,82,89,58,108,58,113,123,124,125,129,130,58,58,138,58,140,58,58,]),'primary_expression':([28,44,46,49,50,53,57,63,74,75,76,77,79,80,83,84,85,86,95,96,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,59,]),'empty':([19,],[26,]),'compound_statement':([12,17,28,63,131,133,141,142,],[20,24,61,61,61,61,61,61,]),'unary_expression':([28,44,46,49,50,53,57,63,74,75,76,77,79,80,83,84,85,86,95,96,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[62,81,62,87,62,90,93,62,62,109,110,111,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,62,]),'external_declaration':([0,9,],[2,16,]),'selection_statement':([28,63,131,133,141,142,],[65,65,65,65,65,65,]),'relational_expression':([28,46,50,63,74,79,80,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[47,47,47,47,47,47,47,122,47,47,47,126,47,47,47,47,47,47,47,47,47,]),'function_definition':([0,9,],[5,5,]),'parameter_declaration':([22,73,],[31,106,]),'statement_list':([28,],[63,]),'postfix_expression':([28,44,46,49,50,53,57,63,74,75,76,77,79,80,83,84,85,86,95,96,99,100,101,102,103,104,105,112,131,132,133,134,141,142,],[66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,]),'direct_declarator':([8,10,11,15,30,34,],[14,14,14,14,14,14,]),'type_specifier':([0,4,7,9,19,22,29,73,],[8,10,11,8,30,34,30,34,]),'equality_expression':([28,46,50,63,74,79,80,100,101,102,104,105,112,131,132,133,134,141,142,],[64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,]),}
_lr_goto = {}
for _k, _v in _lr_goto_items.items():
for _x, _y in zip(_v[0], _v[1]):
if not _x in _lr_goto: _lr_goto[_x] = {}
_lr_goto[_x][_k] = _y
del _lr_goto_items
_lr_productions = [
("S' -> translation_unit","S'",1,None,None,None),
('translation_unit -> external_declaration','translation_unit',1,'p_translation_unit_01','cparse.py',453),
('translation_unit -> translation_unit external_declaration','translation_unit',2,'p_translation_unit_02','cparse.py',457),
('external_declaration -> function_definition','external_declaration',1,'p_external_declaration','cparse.py',462),
('external_declaration -> declaration','external_declaration',1,'p_external_declaration','cparse.py',463),
('function_definition -> type_specifier declarator compound_statement','function_definition',3,'p_function_definition_01','cparse.py',467),
('function_definition -> STATIC type_specifier declarator compound_statement','function_definition',4,'p_function_definition_02','cparse.py',472),
('declaration -> type_specifier declarator SEMICOLON','declaration',3,'p_declaration_01','cparse.py',478),
('declaration -> EXTERN type_specifier declarator SEMICOLON','declaration',4,'p_declaration_02','cparse.py',485),
('declaration_list_opt -> empty','declaration_list_opt',1,'p_declaration_list_opt_01','cparse.py',491),
('declaration_list_opt -> declaration_list','declaration_list_opt',1,'p_declaration_list_opt_02','cparse.py',495),
('declaration_list -> declaration','declaration_list',1,'p_declaration_list_02','cparse.py',499),
('declaration_list -> declaration_list declaration','declaration_list',2,'p_declaration_list_03','cparse.py',503),
('type_specifier -> INT','type_specifier',1,'p_type_specifier','cparse.py',508),
('type_specifier -> CHAR','type_specifier',1,'p_type_specifier','cparse.py',509),
('declarator -> direct_declarator','declarator',1,'p_declarator_01','cparse.py',513),
('declarator -> ASTERISK declarator','declarator',2,'p_declarator_02','cparse.py',517),
('direct_declarator -> ID','direct_declarator',1,'p_direct_declarator_01','cparse.py',522),
('direct_declarator -> direct_declarator LPAREN parameter_type_list RPAREN','direct_declarator',4,'p_direct_declarator_02','cparse.py',526),
('direct_declarator -> direct_declarator LPAREN RPAREN','direct_declarator',3,'p_direct_declarator_03','cparse.py',531),
('parameter_type_list -> parameter_list','parameter_type_list',1,'p_parameter_type_list_01','cparse.py',536),
('parameter_type_list -> parameter_list COMMA ELLIPSIS','parameter_type_list',3,'p_parameter_type_list_02','cparse.py',540),
('parameter_list -> parameter_declaration','parameter_list',1,'p_parameter_list_01','cparse.py',545),
('parameter_list -> parameter_list COMMA parameter_declaration','parameter_list',3,'p_parameter_list_02','cparse.py',549),
('parameter_declaration -> type_specifier declarator','parameter_declaration',2,'p_parameter_declaration','cparse.py',554),
('compound_statement -> LBRACE declaration_list_opt statement_list RBRACE','compound_statement',4,'p_compound_statement_01','cparse.py',559),
('compound_statement -> LBRACE declaration_list_opt RBRACE','compound_statement',3,'p_compound_statement_02','cparse.py',563),
('expression_statement -> expression SEMICOLON','expression_statement',2,'p_expression_statement','cparse.py',567),
('expression -> equality_expression','expression',1,'p_expression_01','cparse.py',571),
('expression -> equality_expression ASSIGN expression','expression',3,'p_expression_02','cparse.py',575),
('expression -> equality_expression EQ_PLUS expression','expression',3,'p_expression_02','cparse.py',576),
('expression -> equality_expression EQ_MINUS expression','expression',3,'p_expression_02','cparse.py',577),
('equality_expression -> relational_expression','equality_expression',1,'p_equality_expression_01','cparse.py',581),
('equality_expression -> equality_expression EQ relational_expression','equality_expression',3,'p_equality_expression_02','cparse.py',585),
('equality_expression -> equality_expression NOT_EQ relational_expression','equality_expression',3,'p_equality_expression_02','cparse.py',586),
('relational_expression -> additive_expression','relational_expression',1,'p_relational_expression_01','cparse.py',590),
('relational_expression -> relational_expression LESS additive_expression','relational_expression',3,'p_relational_expression_02','cparse.py',594),
('relational_expression -> relational_expression GREATER additive_expression','relational_expression',3,'p_relational_expression_02','cparse.py',595),
('relational_expression -> relational_expression LESS_EQ additive_expression','relational_expression',3,'p_relational_expression_02','cparse.py',596),
('relational_expression -> relational_expression GREATER_EQ additive_expression','relational_expression',3,'p_relational_expression_02','cparse.py',597),
('postfix_expression -> primary_expression','postfix_expression',1,'p_postfix_expression_01','cparse.py',601),
('postfix_expression -> postfix_expression LPAREN argument_expression_list RPAREN','postfix_expression',4,'p_postfix_expression_02','cparse.py',605),
('postfix_expression -> postfix_expression LPAREN RPAREN','postfix_expression',3,'p_postfix_expression_03','cparse.py',610),
('postfix_expression -> postfix_expression LBRACKET expression RBRACKET','postfix_expression',4,'p_postfix_expression_04','cparse.py',614),
('argument_expression_list -> expression','argument_expression_list',1,'p_argument_expression_list_01','cparse.py',618),
('argument_expression_list -> argument_expression_list COMMA expression','argument_expression_list',3,'p_argument_expression_list_02','cparse.py',622),
('unary_expression -> postfix_expression','unary_expression',1,'p_unary_expression_01','cparse.py',627),
('unary_expression -> MINUS unary_expression','unary_expression',2,'p_unary_expression_02','cparse.py',631),
('unary_expression -> EXCLAMATION unary_expression','unary_expression',2,'p_unary_expression_03','cparse.py',639),
('unary_expression -> ASTERISK unary_expression','unary_expression',2,'p_unary_expression_04','cparse.py',645),
('unary_expression -> AMPERSAND unary_expression','unary_expression',2,'p_unary_expression_05','cparse.py',649),
('mult_expression -> unary_expression','mult_expression',1,'p_mult_expression_01','cparse.py',653),
('mult_expression -> mult_expression ASTERISK unary_expression','mult_expression',3,'p_mult_expression_02','cparse.py',657),
('mult_expression -> mult_expression DIV unary_expression','mult_expression',3,'p_mult_expression_02','cparse.py',658),
('mult_expression -> mult_expression MODULO unary_expression','mult_expression',3,'p_mult_expression_02','cparse.py',659),
('additive_expression -> mult_expression','additive_expression',1,'p_additive_expression_01','cparse.py',663),
('additive_expression -> additive_expression PLUS mult_expression','additive_expression',3,'p_additive_expression_02','cparse.py',667),
('additive_expression -> additive_expression MINUS mult_expression','additive_expression',3,'p_additive_expression_02','cparse.py',668),
('primary_expression -> ID','primary_expression',1,'p_primary_expression_01','cparse.py',672),
('primary_expression -> INUMBER','primary_expression',1,'p_primary_expression_02','cparse.py',676),
('primary_expression -> FNUMBER','primary_expression',1,'p_primary_expression_03','cparse.py',680),
('primary_expression -> CHARACTER','primary_expression',1,'p_primary_expression_04','cparse.py',684),
('primary_expression -> string_literal','primary_expression',1,'p_primary_expression_05','cparse.py',688),
('primary_expression -> LPAREN expression RPAREN','primary_expression',3,'p_primary_expression_06','cparse.py',692),
('string_literal -> STRING','string_literal',1,'p_string_literal_01','cparse.py',696),
('string_literal -> string_literal STRING','string_literal',2,'p_string_literal_02','cparse.py',700),
('statement -> compound_statement','statement',1,'p_statement','cparse.py',705),
('statement -> expression_statement','statement',1,'p_statement','cparse.py',706),
('statement -> selection_statement','statement',1,'p_statement','cparse.py',707),
('statement -> iteration_statement','statement',1,'p_statement','cparse.py',708),
('statement -> jump_statement','statement',1,'p_statement','cparse.py',709),
('jump_statement -> RETURN SEMICOLON','jump_statement',2,'p_jump_statement_01','cparse.py',713),
('jump_statement -> RETURN expression SEMICOLON','jump_statement',3,'p_jump_statement_02','cparse.py',717),
('jump_statement -> BREAK SEMICOLON','jump_statement',2,'p_jump_statement_03','cparse.py',721),
('jump_statement -> CONTINUE SEMICOLON','jump_statement',2,'p_jump_statement_04','cparse.py',725),
('iteration_statement -> WHILE LPAREN expression RPAREN statement','iteration_statement',5,'p_iteration_statement_01','cparse.py',729),
('iteration_statement -> FOR LPAREN expression_statement expression_statement expression RPAREN statement','iteration_statement',7,'p_iteration_statement_02','cparse.py',733),
('selection_statement -> IF LPAREN expression RPAREN statement','selection_statement',5,'p_selection_statement_01','cparse.py',737),
('selection_statement -> IF LPAREN expression RPAREN statement ELSE statement','selection_statement',7,'p_selection_statement_02','cparse.py',741),
('statement_list -> statement','statement_list',1,'p_statement_list_02','cparse.py',745),
('statement_list -> statement_list statement','statement_list',2,'p_statement_list_03','cparse.py',749),
('empty -> <empty>','empty',0,'p_empty','cparse.py',754),
]
/*******************************************************************
* foo.c
* Atul Varma - 5/24/2004
* CS Independent Study
* $Id: foo.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
*
* This is a simple C file that should be compiled by my mini-C
* compiler.
*******************************************************************
*/
/* Prototypes for some standard C library functions (the code
calls these directly). */
extern int printf(char *str, ...);
extern char *malloc(int size);
extern int free(char *ptr);
/* Test of extern variable. How many times we've called
a printf() function. */
extern int stuff_count;
/* Increments this global variable. */
extern int increment_stuff_count();
/* Test of global variable. How many times we've called
the fib() function. */
int fib_count;
/* fibonacci function: Test of basic branching and recursion. */
static int fib(int i)
{
fib_count += 1;
if (i == 1) {
return 1;
} else {
if (i == 0) {
return 0;
} else {
return fib(i-1) + fib(i-2);
}
}
}
/* Just a wrapper to easily show the results of a
call to fib(). */
static int show_fib(int i)
{
printf("fib(%d) is %d.\n", i, fib(i));
return 0;
}
/* Test of pointer indirection and char type. */
static int set_a(char *c)
{
*c = 'a';
return 0;
}
/* Test of string literals and returning char *'s. */
static char *get_literal()
{
return "blah\n";
}
/* Main program that runs the tests. */
int main(int argc, char **argv) {
char c;
int i;
c = 'h';
/* Test of multiple assignment. */
fib_count = stuff_count = 0;
/* Test of command-line argument passing, pointer
indirection/array indexing, for looping. */
printf("My executable name is %s.\n", *argv);
for (i = 0; i < argc; i += 1) {
printf(" argv[%d] is: %s "
"argv[%d][0] is: %c\n", i, argv[i], i, argv[i][0]);
increment_stuff_count();
}
/* Test of while looping with break/continue. */
i = 0;
while (1) {
show_fib(i);
i += 1;
if (i > 5)
break;
else
continue;
}
stuff_count = stuff_count * 2;
printf("fib_count is %d.\n", fib_count);
printf("stuff_count is %d.\n", stuff_count);
printf("before set_a(&c), c == '%c'\n", c);
/* Test of address-of (&) operator. */
set_a(&c);
{
/* Test of char-int and int-char type coercion. */
int a;
char b;
int c;
/* Note that in two's complement arithmetic, this is
a 32-bit int consisting of all 1's.
(This is also a test of the '-' unary operator.) */
a = -1;
/* The following line will raise a warning from the
compiler, because a signed 32-bit int is being truncated
to an unsigned 8-bit char. */
b = a;
c = b;
printf(" a = %d\n", a);
printf(" b = %d\n", b);
printf(" c = %d\n", c);
}
/* Note now that the scope of c is in the function's main
scope, not the scope of the above compound statement.
This test makes sure that the address and contents
of c did not change during the execution of the
compound statement. */
printf("after set_a(&c), c == '%c'\n", c);
printf("get_literal() = %s\n", get_literal());
/* Pointer indexing via array example. */
printf("get_literal()[3] = %c\n", get_literal()[3]);
{
/* Test of building a string using assignment via array indexing
of a char pointer. The buffer is dynamically allocated. */
char *c;
c = malloc(30);
c[0] = 'h';
c[1] = 'i';
c[2] = 0;
printf("array-built string is: %s\n", c);
free(c);
}
return 0;
}
/*******************************************************************
* foo_lib.c
* Atul Varma - 5/24/2004
* CS Independent Study
* $Id: foo_lib.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
*
* Contains external library functions/variables for foo.c.
*******************************************************************
*/
/* Test global variable. */
int stuff_count;
/* Test of static function definition, to make sure it
doesn't conflict with fib() defined in foo.c. */
static int fib()
{
return stuff_count += 1;
}
/* Increment global variable. */
int increment_stuff_count()
{
fib();
return 0;
}