Franck Pommereau

ported from Python 2

*~
*.pyc
*.pyo
__pycache__
,*
+*
.*.swp
.#*
#*
cctlib/parser.out
#!/usr/bin/env python3
import cctlib.main
cctlib.main.main()
File mode changed
# ---------------------------------------------------------------
# clex.py
#
# Atul Varma
# Python C Compiler - Lexical Analyzer
# $Id: clex.py,v 1.2 2004/06/02 21:05:45 varmaa Exp $
# ---------------------------------------------------------------
import ply.lex as lex
import re
# ---------------------------------------------------------------
# TOKEN LIST
# ---------------------------------------------------------------
tokens = (
# Reserved words
'AUTO',
'BREAK',
'CASE',
'CHAR',
'CONST',
'CONTINUE',
'DEFAULT',
'DO',
'DOUBLE',
'ELSE',
'ENUM',
'EXTERN',
'FLOAT',
'FOR',
'GOTO',
'IF',
'INT',
'LONG',
'REGISTER',
'RETURN',
'SHORT',
'SIGNED',
'SIZEOF',
'STATIC',
'STRUCT',
'SWITCH',
'TYPEDEF',
'UNION',
'UNSIGNED',
'VOID',
'VOLATILE',
'WHILE',
# Special characters
'COMMA',
'COLON',
'SEMICOLON',
'LPAREN',
'RPAREN',
'LBRACKET',
'RBRACKET',
'LBRACE',
'RBRACE',
'ASSIGN',
'GREATER',
'LESS',
'EQ',
'NOT_EQ',
'GREATER_EQ',
'LESS_EQ',
'DOUBLE_PLUS',
'DOUBLE_MINUS',
'PLUS',
'MINUS',
'TIMES',
'DIV',
'MODULO',
'DOUBLE_AMPERSAND',
'DOUBLE_PIPE',
'EXCLAMATION',
'AMPERSAND',
'PIPE',
'CARET',
'ASTERISK',
'QUESTION',
'TILDE',
'POUND',
'DOT',
'ELLIPSIS',
'ARROW',
'SHIFT_LEFT',
'SHIFT_RIGHT',
'EQ_PLUS',
'EQ_MINUS',
'EQ_TIMES',
'EQ_DIV',
'EQ_MODULO',
'EQ_PIPE',
'EQ_AMPERSAND',
'EQ_CARET',
'EQ_SHIFT_LEFT',
'EQ_SHIFT_RIGHT',
# Complex tokens
'ID',
'FNUMBER',
'INUMBER',
'STRING',
'CHARACTER',
)
# ---------------------------------------------------------------
# RESERVED WORDS
# ---------------------------------------------------------------
reserved_words = {
'auto' : 'AUTO',
'break' : 'BREAK',
'case' : 'CASE',
'char' : 'CHAR',
'const' : 'CONST',
'continue' : 'CONTINUE',
'default' : 'DEFAULT',
'do' : 'DO',
'double' : 'DOUBLE',
'else' : 'ELSE',
'enum' : 'ENUM',
'extern' : 'EXTERN',
'float' : 'FLOAT',
'for' : 'FOR',
'goto' : 'GOTO',
'if' : 'IF',
'int' : 'INT',
'long' : 'LONG',
'register' : 'REGISTER',
'return' : 'RETURN',
'short' : 'SHORT',
'signed' : 'SIGNED',
'sizeof' : 'SIZEOF',
'static' : 'STATIC',
'struct' : 'STRUCT',
'switch' : 'SWITCH',
'typedef' : 'TYPEDEF',
'union' : 'UNION',
'unsigned' : 'UNSIGNED',
'void' : 'VOID',
'volatile' : 'VOLATILE',
'while' : 'WHILE'
}
# ---------------------------------------------------------------
# SPECIAL CHARACTERS
# ---------------------------------------------------------------
t_COMMA = r','
t_COLON = r':'
t_SEMICOLON = r';'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_ASSIGN = r'='
t_GREATER = r'>'
t_LESS = r'<'
t_EQ = r'=='
t_NOT_EQ = r'!='
t_GREATER_EQ = r'>='
t_LESS_EQ = r'<='
t_DOUBLE_PLUS = r'\+\+'
t_DOUBLE_MINUS = r'--'
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIV = r'/(?!\*)'
t_MODULO = r'%'
t_DOUBLE_AMPERSAND = r'&&'
t_DOUBLE_PIPE = r'\|\|'
t_EXCLAMATION = r'!'
t_AMPERSAND = r'&'
t_PIPE = r'\|'
t_CARET = r'\^'
t_ASTERISK = r'\*'
t_QUESTION = r'\?'
t_TILDE = r'~'
t_POUND = r'\#'
t_ELLIPSIS = r'\.\.\.'
t_DOT = r'\.'
t_ARROW = r'->'
t_SHIFT_LEFT = r'<<'
t_SHIFT_RIGHT = r'>>'
t_EQ_PLUS = r'\+='
t_EQ_MINUS = r'-='
t_EQ_TIMES = r'\*='
t_EQ_DIV = r'/='
t_EQ_MODULO = r'%='
t_EQ_PIPE = r'\|='
t_EQ_AMPERSAND = r'&='
t_EQ_CARET = r'\^='
t_EQ_SHIFT_LEFT = r'<<='
t_EQ_SHIFT_RIGHT = r'>>='
# ---------------------------------------------------------------
# COMPLEX TOKENS
# ---------------------------------------------------------------
def t_ID(t):
r'[A-Za-z_][\w]*'
if t.value in reserved_words:
t.type = reserved_words[t.value]
return t
def t_FNUMBER(t):
r'((0(?!\d))|([1-9]\d*))((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
return t
def t_malformed_fnumber(t):
r'(0\d+)((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
print("Line %d. Malformed floating point number '%s'" % (t.lineno, t.value))
def t_INUMBER(t):
r'0(?!\d)|([1-9]\d*)'
return t
def t_malformed_inumber(t):
r'0\d+'
print("Line %d. Malformed integer '%s'" % (t.lineno, t.value))
def t_CHARACTER(t):
r"'\w'"
return t
def t_STRING(t):
r'"[^\n]*?(?<!\\)"'
temp_str = t.value.replace(r'\\', '')
m = re.search(r'\\[^n"]', temp_str)
if m != None:
print("Line %d. Unsupported character escape %s in string literal."
% (t.lineno, m.group(0)))
return
return t
# ---------------------------------------------------------------
# IGNORED TOKENS
# ---------------------------------------------------------------
def t_WHITESPACE(t):
r'[ \t]+'
pass
def t_NEWLINE(t):
r'\n+'
t.lineno += len(t.value)
def t_COMMENT(t):
r'/\*[\w\W]*?\*/'
t.lineno += t.value.count('\n')
pass
# ---------------------------------------------------------------
# ERROR HANDLING
# ---------------------------------------------------------------
def t_error(t):
print("Line %d." % (t.lineno,) + "",)
if t.value[0] == '"':
print("Unterminated string literal.")
if t.value.count('\n') > 0:
t.skip(t.value.index('\n'))
elif t.value[0:2] == '/*':
print("Unterminated comment.")
else:
print("Illegal character '%s'" % t.value[0])
t.skip(1)
# ---------------------------------------------------------------
# MAIN LEXER FUNCTIONALITY
# ---------------------------------------------------------------
def run_lexer():
"""This is just a debugging function that prints out a list of
tokens, it's not actually called by the compiler or anything."""
import sys
file = open(sys.argv[1])
lines = file.readlines()
file.close()
strings = ""
for i in lines:
strings += i
lex.input(strings)
while 1:
token = lex.token() # Get a token
if not token: break # No more tokens
print("(%s,'%s',%d)" % (token.type, token.value, token.lineno))
lex.lex()
if __name__ == '__main__':
run_lexer()
# ---------------------------------------------------------------
# End of clex.py
# ---------------------------------------------------------------
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
import argparse, sys, os.path
import ply.yacc as yacc
from . import cparse, cvisitors, cx86
class CompileError (Exception) :
"Exception raised when there's been a compilation error."
pass
class Compiler (object) :
"""This object encapsulates the front-end for the compiler and
serves as a facade interface to the 'meat' of the compiler
underneath."""
def __init__ (self) :
self.total_errors = 0
self.total_warnings = 0
def _parse (self) :
"Parses the source code."
self.ast = yacc.parse(self.code)
def _compile_phase (self, visitor) :
"Applies a visitor to the abstract syntax tree."
visitor.visit(self.ast)
self.total_errors += visitor.errors
self.total_warnings += visitor.warnings
if visitor.has_errors():
raise CompileError()
def _do_compile (self, outfile, ast_file) :
"""Compiles the code to the given file object. Enabling
show_ast prints out the abstract syntax tree."""
self._parse()
self._compile_phase(cvisitors.SymtabVisitor())
self._compile_phase(cvisitors.TypeCheckVisitor())
self._compile_phase(cvisitors.FlowControlVisitor())
self._compile_phase(cx86.CodeGenVisitor(outfile))
if ast_file is not None:
self._compile_phase(cvisitors.ASTPrinterVisitor(ast_file))
def _print_stats (self) :
"Prints the total number of errors/warnings from compilation."
print("%d errors, %d warnings." % (self.total_errors, self.total_warnings))
def compile (self, code, outfile, show_ast) :
"Compiles the given code string to the given file object."
self.code = code
try:
self._do_compile(outfile, show_ast)
except cparse.ParseError:
print("Errors encountered, bailing.")
return 1
except CompileError:
self._print_stats()
print("Errors encountered, bailing.")
return 1
self._print_stats()
print("Compile successful.")
return 0
def main (args=None) :
parser = argparse.ArgumentParser(prog="cct")
parser.add_argument("-o", action="store", metavar="PATH",
type=argparse.FileType('w'), default=sys.stdout,
help="write output to PATH")
parser.add_argument("--ast", action="store_true", default=False,
help="dump AST for each C file")
parser.add_argument("source", nargs="+", metavar="PATH",
help="C source files(s) to compile")
args = parser.parse_args(args)
for src in args.source :
if args.ast :
ast_file = open(os.path.splitext(src)[0] + ".ast", "w")
else :
ast_file = None
retval = Compiler().compile(open(src).read(), args.o, ast_file)
if ast_file is not None :
ast_file.close()
if retval != 0 :
sys.exit(retval)
This diff is collapsed. Click to expand it.
/*******************************************************************
* foo.c
* Atul Varma - 5/24/2004
* CS Independent Study
* $Id: foo.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
*
* This is a simple C file that should be compiled by my mini-C
* compiler.
*******************************************************************
*/
/* Prototypes for some standard C library functions (the code
calls these directly). */
extern int printf(char *str, ...);
extern char *malloc(int size);
extern int free(char *ptr);
/* Test of extern variable. How many times we've called
a printf() function. */
extern int stuff_count;
/* Increments this global variable. */
extern int increment_stuff_count();
/* Test of global variable. How many times we've called
the fib() function. */
int fib_count;
/* fibonacci function: Test of basic branching and recursion. */
static int fib(int i)
{
fib_count += 1;
if (i == 1) {
return 1;
} else {
if (i == 0) {
return 0;
} else {
return fib(i-1) + fib(i-2);
}
}
}
/* Just a wrapper to easily show the results of a
call to fib(). */
static int show_fib(int i)
{
printf("fib(%d) is %d.\n", i, fib(i));
return 0;
}
/* Test of pointer indirection and char type. */
static int set_a(char *c)
{
*c = 'a';
return 0;
}
/* Test of string literals and returning char *'s. */
static char *get_literal()
{
return "blah\n";
}
/* Main program that runs the tests. */
int main(int argc, char **argv) {
char c;
int i;
c = 'h';
/* Test of multiple assignment. */
fib_count = stuff_count = 0;
/* Test of command-line argument passing, pointer
indirection/array indexing, for looping. */
printf("My executable name is %s.\n", *argv);
for (i = 0; i < argc; i += 1) {
printf(" argv[%d] is: %s "
"argv[%d][0] is: %c\n", i, argv[i], i, argv[i][0]);
increment_stuff_count();
}
/* Test of while looping with break/continue. */
i = 0;
while (1) {
show_fib(i);
i += 1;
if (i > 5)
break;
else
continue;
}
stuff_count = stuff_count * 2;
printf("fib_count is %d.\n", fib_count);
printf("stuff_count is %d.\n", stuff_count);
printf("before set_a(&c), c == '%c'\n", c);
/* Test of address-of (&) operator. */
set_a(&c);
{
/* Test of char-int and int-char type coercion. */
int a;
char b;
int c;
/* Note that in two's complement arithmetic, this is
a 32-bit int consisting of all 1's.
(This is also a test of the '-' unary operator.) */
a = -1;
/* The following line will raise a warning from the
compiler, because a signed 32-bit int is being truncated
to an unsigned 8-bit char. */
b = a;
c = b;
printf(" a = %d\n", a);
printf(" b = %d\n", b);
printf(" c = %d\n", c);
}
/* Note now that the scope of c is in the function's main
scope, not the scope of the above compound statement.
This test makes sure that the address and contents
of c did not change during the execution of the
compound statement. */
printf("after set_a(&c), c == '%c'\n", c);
printf("get_literal() = %s\n", get_literal());
/* Pointer indexing via array example. */
printf("get_literal()[3] = %c\n", get_literal()[3]);
{
/* Test of building a string using assignment via array indexing
of a char pointer. The buffer is dynamically allocated. */
char *c;
c = malloc(30);
c[0] = 'h';
c[1] = 'i';
c[2] = 0;
printf("array-built string is: %s\n", c);
free(c);
}
return 0;
}
/*******************************************************************
* foo_lib.c
* Atul Varma - 5/24/2004
* CS Independent Study
* $Id: foo_lib.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
*
* Contains external library functions/variables for foo.c.
*******************************************************************
*/
/* Test global variable. */
int stuff_count;
/* Test of static function definition, to make sure it
doesn't conflict with fib() defined in foo.c. */
static int fib()
{
return stuff_count += 1;
}
/* Increment global variable. */
int increment_stuff_count()
{
fib();
return 0;
}