Franck Pommereau

ported from Python 2

1 +*~
2 +*.pyc
3 +*.pyo
4 +__pycache__
5 +,*
6 ++*
7 +.*.swp
8 +.#*
9 +#*
10 +cctlib/parser.out
1 +#!/usr/bin/env python3
2 +import cctlib.main
3 +cctlib.main.main()
File mode changed
1 +# ---------------------------------------------------------------
2 +# clex.py
3 +#
4 +# Atul Varma
5 +# Python C Compiler - Lexical Analyzer
6 +# $Id: clex.py,v 1.2 2004/06/02 21:05:45 varmaa Exp $
7 +# ---------------------------------------------------------------
8 +
9 +import ply.lex as lex
10 +import re
11 +
12 +# ---------------------------------------------------------------
13 +# TOKEN LIST
14 +# ---------------------------------------------------------------
15 +
16 +tokens = (
17 + # Reserved words
18 + 'AUTO',
19 + 'BREAK',
20 + 'CASE',
21 + 'CHAR',
22 + 'CONST',
23 + 'CONTINUE',
24 + 'DEFAULT',
25 + 'DO',
26 + 'DOUBLE',
27 + 'ELSE',
28 + 'ENUM',
29 + 'EXTERN',
30 + 'FLOAT',
31 + 'FOR',
32 + 'GOTO',
33 + 'IF',
34 + 'INT',
35 + 'LONG',
36 + 'REGISTER',
37 + 'RETURN',
38 + 'SHORT',
39 + 'SIGNED',
40 + 'SIZEOF',
41 + 'STATIC',
42 + 'STRUCT',
43 + 'SWITCH',
44 + 'TYPEDEF',
45 + 'UNION',
46 + 'UNSIGNED',
47 + 'VOID',
48 + 'VOLATILE',
49 + 'WHILE',
50 +
51 + # Special characters
52 + 'COMMA',
53 + 'COLON',
54 + 'SEMICOLON',
55 + 'LPAREN',
56 + 'RPAREN',
57 + 'LBRACKET',
58 + 'RBRACKET',
59 + 'LBRACE',
60 + 'RBRACE',
61 + 'ASSIGN',
62 + 'GREATER',
63 + 'LESS',
64 + 'EQ',
65 + 'NOT_EQ',
66 + 'GREATER_EQ',
67 + 'LESS_EQ',
68 + 'DOUBLE_PLUS',
69 + 'DOUBLE_MINUS',
70 + 'PLUS',
71 + 'MINUS',
72 + 'TIMES',
73 + 'DIV',
74 + 'MODULO',
75 + 'DOUBLE_AMPERSAND',
76 + 'DOUBLE_PIPE',
77 + 'EXCLAMATION',
78 + 'AMPERSAND',
79 + 'PIPE',
80 + 'CARET',
81 + 'ASTERISK',
82 + 'QUESTION',
83 + 'TILDE',
84 + 'POUND',
85 + 'DOT',
86 + 'ELLIPSIS',
87 + 'ARROW',
88 + 'SHIFT_LEFT',
89 + 'SHIFT_RIGHT',
90 + 'EQ_PLUS',
91 + 'EQ_MINUS',
92 + 'EQ_TIMES',
93 + 'EQ_DIV',
94 + 'EQ_MODULO',
95 + 'EQ_PIPE',
96 + 'EQ_AMPERSAND',
97 + 'EQ_CARET',
98 + 'EQ_SHIFT_LEFT',
99 + 'EQ_SHIFT_RIGHT',
100 +
101 + # Complex tokens
102 + 'ID',
103 + 'FNUMBER',
104 + 'INUMBER',
105 + 'STRING',
106 + 'CHARACTER',
107 + )
108 +
109 +# ---------------------------------------------------------------
110 +# RESERVED WORDS
111 +# ---------------------------------------------------------------
112 +
113 +reserved_words = {
114 + 'auto' : 'AUTO',
115 + 'break' : 'BREAK',
116 + 'case' : 'CASE',
117 + 'char' : 'CHAR',
118 + 'const' : 'CONST',
119 + 'continue' : 'CONTINUE',
120 + 'default' : 'DEFAULT',
121 + 'do' : 'DO',
122 + 'double' : 'DOUBLE',
123 + 'else' : 'ELSE',
124 + 'enum' : 'ENUM',
125 + 'extern' : 'EXTERN',
126 + 'float' : 'FLOAT',
127 + 'for' : 'FOR',
128 + 'goto' : 'GOTO',
129 + 'if' : 'IF',
130 + 'int' : 'INT',
131 + 'long' : 'LONG',
132 + 'register' : 'REGISTER',
133 + 'return' : 'RETURN',
134 + 'short' : 'SHORT',
135 + 'signed' : 'SIGNED',
136 + 'sizeof' : 'SIZEOF',
137 + 'static' : 'STATIC',
138 + 'struct' : 'STRUCT',
139 + 'switch' : 'SWITCH',
140 + 'typedef' : 'TYPEDEF',
141 + 'union' : 'UNION',
142 + 'unsigned' : 'UNSIGNED',
143 + 'void' : 'VOID',
144 + 'volatile' : 'VOLATILE',
145 + 'while' : 'WHILE'
146 +}
147 +
148 +# ---------------------------------------------------------------
149 +# SPECIAL CHARACTERS
150 +# ---------------------------------------------------------------
151 +
152 +t_COMMA = r','
153 +t_COLON = r':'
154 +t_SEMICOLON = r';'
155 +t_LPAREN = r'\('
156 +t_RPAREN = r'\)'
157 +t_LBRACKET = r'\['
158 +t_RBRACKET = r'\]'
159 +t_LBRACE = r'{'
160 +t_RBRACE = r'}'
161 +t_ASSIGN = r'='
162 +t_GREATER = r'>'
163 +t_LESS = r'<'
164 +t_EQ = r'=='
165 +t_NOT_EQ = r'!='
166 +t_GREATER_EQ = r'>='
167 +t_LESS_EQ = r'<='
168 +t_DOUBLE_PLUS = r'\+\+'
169 +t_DOUBLE_MINUS = r'--'
170 +t_PLUS = r'\+'
171 +t_MINUS = r'-'
172 +t_TIMES = r'\*'
173 +t_DIV = r'/(?!\*)'
174 +t_MODULO = r'%'
175 +t_DOUBLE_AMPERSAND = r'&&'
176 +t_DOUBLE_PIPE = r'\|\|'
177 +t_EXCLAMATION = r'!'
178 +t_AMPERSAND = r'&'
179 +t_PIPE = r'\|'
180 +t_CARET = r'\^'
181 +t_ASTERISK = r'\*'
182 +t_QUESTION = r'\?'
183 +t_TILDE = r'~'
184 +t_POUND = r'\#'
185 +t_ELLIPSIS = r'\.\.\.'
186 +t_DOT = r'\.'
187 +t_ARROW = r'->'
188 +t_SHIFT_LEFT = r'<<'
189 +t_SHIFT_RIGHT = r'>>'
190 +t_EQ_PLUS = r'\+='
191 +t_EQ_MINUS = r'-='
192 +t_EQ_TIMES = r'\*='
193 +t_EQ_DIV = r'/='
194 +t_EQ_MODULO = r'%='
195 +t_EQ_PIPE = r'\|='
196 +t_EQ_AMPERSAND = r'&='
197 +t_EQ_CARET = r'\^='
198 +t_EQ_SHIFT_LEFT = r'<<='
199 +t_EQ_SHIFT_RIGHT = r'>>='
200 +
201 +# ---------------------------------------------------------------
202 +# COMPLEX TOKENS
203 +# ---------------------------------------------------------------
204 +
205 +def t_ID(t):
206 + r'[A-Za-z_][\w]*'
207 + if t.value in reserved_words:
208 + t.type = reserved_words[t.value]
209 + return t
210 +
211 +def t_FNUMBER(t):
212 + r'((0(?!\d))|([1-9]\d*))((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
213 + return t
214 +
215 +def t_malformed_fnumber(t):
216 + r'(0\d+)((\.\d+(e[+-]?\d+)?)|(e[+-]?\d+))'
217 + print("Line %d. Malformed floating point number '%s'" % (t.lineno, t.value))
218 +
219 +def t_INUMBER(t):
220 + r'0(?!\d)|([1-9]\d*)'
221 + return t
222 +
223 +def t_malformed_inumber(t):
224 + r'0\d+'
225 + print("Line %d. Malformed integer '%s'" % (t.lineno, t.value))
226 +
227 +def t_CHARACTER(t):
228 + r"'\w'"
229 + return t
230 +
231 +def t_STRING(t):
232 + r'"[^\n]*?(?<!\\)"'
233 + temp_str = t.value.replace(r'\\', '')
234 + m = re.search(r'\\[^n"]', temp_str)
235 + if m != None:
236 + print("Line %d. Unsupported character escape %s in string literal."
237 + % (t.lineno, m.group(0)))
238 + return
239 + return t
240 +
241 +# ---------------------------------------------------------------
242 +# IGNORED TOKENS
243 +# ---------------------------------------------------------------
244 +
245 +def t_WHITESPACE(t):
246 + r'[ \t]+'
247 + pass
248 +
249 +def t_NEWLINE(t):
250 + r'\n+'
251 + t.lineno += len(t.value)
252 +
253 +def t_COMMENT(t):
254 + r'/\*[\w\W]*?\*/'
255 + t.lineno += t.value.count('\n')
256 + pass
257 +
258 +# ---------------------------------------------------------------
259 +# ERROR HANDLING
260 +# ---------------------------------------------------------------
261 +
262 +def t_error(t):
263 + print("Line %d." % (t.lineno,) + "",)
264 + if t.value[0] == '"':
265 + print("Unterminated string literal.")
266 + if t.value.count('\n') > 0:
267 + t.skip(t.value.index('\n'))
268 + elif t.value[0:2] == '/*':
269 + print("Unterminated comment.")
270 + else:
271 + print("Illegal character '%s'" % t.value[0])
272 + t.skip(1)
273 +
274 +# ---------------------------------------------------------------
275 +# MAIN LEXER FUNCTIONALITY
276 +# ---------------------------------------------------------------
277 +
278 +def run_lexer():
279 + """This is just a debugging function that prints out a list of
280 + tokens, it's not actually called by the compiler or anything."""
281 +
282 + import sys
283 + file = open(sys.argv[1])
284 + lines = file.readlines()
285 + file.close()
286 + strings = ""
287 + for i in lines:
288 + strings += i
289 + lex.input(strings)
290 + while 1:
291 + token = lex.token() # Get a token
292 + if not token: break # No more tokens
293 + print("(%s,'%s',%d)" % (token.type, token.value, token.lineno))
294 +
295 +lex.lex()
296 +
297 +if __name__ == '__main__':
298 + run_lexer()
299 +
300 +# ---------------------------------------------------------------
301 +# End of clex.py
302 +# ---------------------------------------------------------------
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 +import argparse, sys, os.path
2 +
3 +import ply.yacc as yacc
4 +from . import cparse, cvisitors, cx86
5 +
6 +class CompileError (Exception) :
7 + "Exception raised when there's been a compilation error."
8 + pass
9 +
10 +class Compiler (object) :
11 + """This object encapsulates the front-end for the compiler and
12 + serves as a facade interface to the 'meat' of the compiler
13 + underneath."""
14 + def __init__ (self) :
15 + self.total_errors = 0
16 + self.total_warnings = 0
17 + def _parse (self) :
18 + "Parses the source code."
19 + self.ast = yacc.parse(self.code)
20 + def _compile_phase (self, visitor) :
21 + "Applies a visitor to the abstract syntax tree."
22 + visitor.visit(self.ast)
23 + self.total_errors += visitor.errors
24 + self.total_warnings += visitor.warnings
25 + if visitor.has_errors():
26 + raise CompileError()
27 + def _do_compile (self, outfile, ast_file) :
28 + """Compiles the code to the given file object. Enabling
29 + show_ast prints out the abstract syntax tree."""
30 + self._parse()
31 + self._compile_phase(cvisitors.SymtabVisitor())
32 + self._compile_phase(cvisitors.TypeCheckVisitor())
33 + self._compile_phase(cvisitors.FlowControlVisitor())
34 + self._compile_phase(cx86.CodeGenVisitor(outfile))
35 + if ast_file is not None:
36 + self._compile_phase(cvisitors.ASTPrinterVisitor(ast_file))
37 + def _print_stats (self) :
38 + "Prints the total number of errors/warnings from compilation."
39 + print("%d errors, %d warnings." % (self.total_errors, self.total_warnings))
40 + def compile (self, code, outfile, show_ast) :
41 + "Compiles the given code string to the given file object."
42 + self.code = code
43 + try:
44 + self._do_compile(outfile, show_ast)
45 + except cparse.ParseError:
46 + print("Errors encountered, bailing.")
47 + return 1
48 + except CompileError:
49 + self._print_stats()
50 + print("Errors encountered, bailing.")
51 + return 1
52 + self._print_stats()
53 + print("Compile successful.")
54 + return 0
55 +
56 +def main (args=None) :
57 + parser = argparse.ArgumentParser(prog="cct")
58 + parser.add_argument("-o", action="store", metavar="PATH",
59 + type=argparse.FileType('w'), default=sys.stdout,
60 + help="write output to PATH")
61 + parser.add_argument("--ast", action="store_true", default=False,
62 + help="dump AST for each C file")
63 + parser.add_argument("source", nargs="+", metavar="PATH",
64 + help="C source files(s) to compile")
65 + args = parser.parse_args(args)
66 + for src in args.source :
67 + if args.ast :
68 + ast_file = open(os.path.splitext(src)[0] + ".ast", "w")
69 + else :
70 + ast_file = None
71 + retval = Compiler().compile(open(src).read(), args.o, ast_file)
72 + if ast_file is not None :
73 + ast_file.close()
74 + if retval != 0 :
75 + sys.exit(retval)
This diff is collapsed. Click to expand it.
1 +/*******************************************************************
2 + * foo.c
3 + * Atul Varma - 5/24/2004
4 + * CS Independent Study
5 + * $Id: foo.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
6 + *
7 + * This is a simple C file that should be compiled by my mini-C
8 + * compiler.
9 + *******************************************************************
10 +*/
11 +
12 +/* Prototypes for some standard C library functions (the code
13 + calls these directly). */
14 +extern int printf(char *str, ...);
15 +extern char *malloc(int size);
16 +extern int free(char *ptr);
17 +
18 +/* Test of extern variable. How many times we've called
19 + a printf() function. */
20 +extern int stuff_count;
21 +
22 +/* Increments this global variable. */
23 +extern int increment_stuff_count();
24 +
25 +/* Test of global variable. How many times we've called
26 + the fib() function. */
27 +int fib_count;
28 +
29 +/* fibonacci function: Test of basic branching and recursion. */
30 +static int fib(int i)
31 +{
32 + fib_count += 1;
33 + if (i == 1) {
34 + return 1;
35 + } else {
36 + if (i == 0) {
37 + return 0;
38 + } else {
39 + return fib(i-1) + fib(i-2);
40 + }
41 + }
42 +}
43 +
44 +/* Just a wrapper to easily show the results of a
45 + call to fib(). */
46 +static int show_fib(int i)
47 +{
48 + printf("fib(%d) is %d.\n", i, fib(i));
49 + return 0;
50 +}
51 +
52 +/* Test of pointer indirection and char type. */
53 +static int set_a(char *c)
54 +{
55 + *c = 'a';
56 + return 0;
57 +}
58 +
59 +/* Test of string literals and returning char *'s. */
60 +static char *get_literal()
61 +{
62 + return "blah\n";
63 +}
64 +
65 +/* Main program that runs the tests. */
66 +int main(int argc, char **argv) {
67 + char c;
68 + int i;
69 +
70 + c = 'h';
71 +
72 + /* Test of multiple assignment. */
73 + fib_count = stuff_count = 0;
74 +
75 + /* Test of command-line argument passing, pointer
76 + indirection/array indexing, for looping. */
77 + printf("My executable name is %s.\n", *argv);
78 + for (i = 0; i < argc; i += 1) {
79 + printf(" argv[%d] is: %s "
80 + "argv[%d][0] is: %c\n", i, argv[i], i, argv[i][0]);
81 + increment_stuff_count();
82 + }
83 +
84 + /* Test of while looping with break/continue. */
85 + i = 0;
86 + while (1) {
87 + show_fib(i);
88 + i += 1;
89 + if (i > 5)
90 + break;
91 + else
92 + continue;
93 + }
94 + stuff_count = stuff_count * 2;
95 +
96 + printf("fib_count is %d.\n", fib_count);
97 + printf("stuff_count is %d.\n", stuff_count);
98 +
99 + printf("before set_a(&c), c == '%c'\n", c);
100 +
101 + /* Test of address-of (&) operator. */
102 + set_a(&c);
103 +
104 + {
105 + /* Test of char-int and int-char type coercion. */
106 + int a;
107 + char b;
108 + int c;
109 +
110 + /* Note that in two's complement arithmetic, this is
111 + a 32-bit int consisting of all 1's.
112 +
113 + (This is also a test of the '-' unary operator.) */
114 + a = -1;
115 +
116 + /* The following line will raise a warning from the
117 + compiler, because a signed 32-bit int is being truncated
118 + to an unsigned 8-bit char. */
119 + b = a;
120 +
121 + c = b;
122 +
123 + printf(" a = %d\n", a);
124 + printf(" b = %d\n", b);
125 + printf(" c = %d\n", c);
126 + }
127 +
128 + /* Note now that the scope of c is in the function's main
129 + scope, not the scope of the above compound statement.
130 + This test makes sure that the address and contents
131 + of c did not change during the execution of the
132 + compound statement. */
133 + printf("after set_a(&c), c == '%c'\n", c);
134 +
135 + printf("get_literal() = %s\n", get_literal());
136 +
137 + /* Pointer indexing via array example. */
138 + printf("get_literal()[3] = %c\n", get_literal()[3]);
139 +
140 + {
141 + /* Test of building a string using assignment via array indexing
142 + of a char pointer. The buffer is dynamically allocated. */
143 + char *c;
144 +
145 + c = malloc(30);
146 + c[0] = 'h';
147 + c[1] = 'i';
148 + c[2] = 0;
149 + printf("array-built string is: %s\n", c);
150 + free(c);
151 + }
152 + return 0;
153 +}
1 +/*******************************************************************
2 + * foo_lib.c
3 + * Atul Varma - 5/24/2004
4 + * CS Independent Study
5 + * $Id: foo_lib.c,v 1.1 2004/05/27 16:25:14 varmaa Exp $
6 + *
7 + * Contains external library functions/variables for foo.c.
8 + *******************************************************************
9 +*/
10 +
11 +/* Test global variable. */
12 +int stuff_count;
13 +
14 +/* Test of static function definition, to make sure it
15 + doesn't conflict with fib() defined in foo.c. */
16 +static int fib()
17 +{
18 + return stuff_count += 1;
19 +}
20 +
21 +/* Increment global variable. */
22 +int increment_stuff_count()
23 +{
24 + fib();
25 + return 0;
26 +}