ACSE 2.0.2
Advanced Compiler System for Education (basic documentation)
Loading...
Searching...
No Matches
parser.y
Go to the documentation of this file.
1/// @file parser.y
2/// @brief The bison grammar file that describes the LANCE language and
3/// the semantic actions used to translate it to assembly code.
4
5%{
6#include <stdio.h>
7#include <stdlib.h>
8#include <limits.h>
9#include "errors.h"
10#include "list.h"
11#include "codegen.h"
12#include "scanner.h"
13#include "parser.h"
14
15/*
16 * Global variables
17 */
18
19// The program currently being compiled.
20static t_program *program;
21
22void yyerror(const char *msg)
23{
24 emitError(curFileLoc, "%s", msg);
25}
26
27%}
28
29/*
30 * Axiom declaration
31 *
32 * The starting non-terminal of the grammar will be `program'.
33 */
34
35%start program
36
37/*
38 * Union declaration
39 *
40 * Specifies the set of types available for the semantic values of terminal
41 * and non-terminal symbols.
42 */
43
44%union {
45 int integer;
46 char *string;
47 t_regID reg;
48 t_symbol *var;
49 t_listNode *list;
50 t_label *label;
51 t_ifStmt ifStmt;
52 t_whileStmt whileStmt;
53}
54
55/*
56 * Terminal symbol declaration
57 *
58 * Here we declare all the token symbols that can be produced by the scanner.
59 * Bison will automatically produce a #define that assigns a number (or token
60 * identifier) to each one of these tokens.
61 * We also declare the type for the semantic values of some of these tokens.
62 */
63
64%token EOF_TOK // End of file.
65%token LPAR RPAR LSQUARE RSQUARE LBRACE RBRACE
66%token COMMA SEMI PLUS MINUS MUL_OP DIV_OP MOD_OP
67%token AND_OP XOR_OP OR_OP NOT_OP
68%token ASSIGN LT GT SHL_OP SHR_OP EQ NOTEQ LTEQ GTEQ
69%token ANDAND OROR
70%token TYPE
71%token RETURN
72%token READ WRITE ELSE
73
74// These are the tokens with a semantic value.
75%token <ifStmt> IF
76%token <whileStmt> WHILE
77%token <label> DO
78%token <string> IDENTIFIER
79%token <integer> NUMBER
80
81/*
82 * Non-terminal symbol semantic value type declarations
83 *
84 * Here we declare the type of the semantic values of non-terminal symbols.
85 * We only declare the type of non-terminal symbols of which we actually use
86 * their semantic value.
87 */
88
89%type <var> var_id
90%type <reg> exp
91
92/*
93 * Operator precedence and associativity
94 *
95 * Precedence is given by the declaration order. Associativity is given by the
96 * specific keyword used (%left, %right).
97 */
98
99%left OROR
100%left ANDAND
101%left OR_OP
102%left XOR_OP
103%left AND_OP
104%left EQ NOTEQ
105%left LT GT LTEQ GTEQ
106%left SHL_OP SHR_OP
107%left PLUS MINUS
108%left MUL_OP DIV_OP MOD_OP
109%right NOT_OP
110
111/*
112 * Grammar and semantic actions
113 *
114 * The grammar of the language follows. The semantic actions are the pieces of
115 * C code enclosed in {} brackets: they are executed when the rule has been
116 * parsed and recognized up to the point where the semantic action appears.
117 */
118
119%%
120
121/* `program' is the starting non-terminal of the grammar.
122 * A program is composed by:
123 * 1. Declarations (zero or more),
124 * 2. A list of instructions (zero or more). */
125program
126 : var_declarations statements EOF_TOK
127 {
128 // Generate the epilog of the program, that is, a call to the
129 // `exit' syscall.
130 genEpilog(program);
131 // Return from yyparse().
132 YYACCEPT;
133 }
134;
135
136/* This non-terminal appears at the beginning of the program and represents
137 * all the declarations. */
138var_declarations
139 : var_declarations var_declaration
140 | /* empty */
141;
142
143/* Each declaration consists of a type, a list of declarators, and a
144 * terminating semicolon. */
145var_declaration
146 : TYPE declarator_list SEMI
147;
148
149declarator_list
150 : declarator_list COMMA declarator
151 | declarator
152;
153
154/* A declarator specifies either a scalar variable name or an array name
155 * and size. */
156declarator
157 : IDENTIFIER
158 {
159 createSymbol(program, $1, TYPE_INT, 0);
160 }
161 | IDENTIFIER LSQUARE NUMBER RSQUARE
162 {
163 createSymbol(program, $1, TYPE_INT_ARRAY, $3);
164 }
165;
166
167/* A block of code is a list of statements enclosed between braces. */
168code_block
169 : LBRACE statements RBRACE
170;
171
172statements
173 : statements statement
174 | /* empty */
175;
176
177statement
178 : assign_statement SEMI
179 | if_statement
180 | while_statement
181 | do_while_statement SEMI
182 | return_statement SEMI
183 | read_statement SEMI
184 | write_statement SEMI
185 | SEMI
186;
187
188/* An assignment statement stores the value of an expression in the memory
189 * location of a given scalar variable or array element. */
190assign_statement
191 : var_id ASSIGN exp
192 {
193 genStoreRegisterToVariable(program, $1, $3);
194 }
195 | var_id LSQUARE exp RSQUARE ASSIGN exp
196 {
197 genStoreRegisterToArrayElement(program, $1, $3, $6);
198 }
199;
200
201/* An if statements first computes the expression, then jumps to the `else' part
202 * if the expression is equal to zero. Otherwise the `then' part is executed.
203 * After the `then' part the `else' part needs to be jumped over. */
204if_statement
205 : IF LPAR exp RPAR
206 {
207 // Generate a jump to the else part if the expression is equal to zero.
208 $1.lElse = createLabel(program);
209 genBEQ(program, $3, REG_0, $1.lElse);
210 }
211 code_block
212 {
213 // After the `then' part, generate a jump to the end of the statement.
214 $1.lExit = createLabel(program);
215 genJ(program, $1.lExit);
216 // Assign the label which points to the first instruction of the else part.
217 assignLabel(program, $1.lElse);
218 }
219 else_part
220 {
221 // Assign the label to the end of the statement.
222 assignLabel(program, $1.lExit);
223 }
224;
225
226/* The `else' part may be missing. */
227else_part
228 : ELSE code_block
229 | /* empty */
230;
231
232/* A while statement repeats the execution of its code block as long as the
233 * expression is different than zero. The expression is computed at the
234 * beginning of each loop iteration. */
235while_statement
236 : WHILE
237 {
238 // Assign a label at the beginning of the loop for the back-edge.
239 $1.lLoop = createLabel(program);
240 assignLabel(program, $1.lLoop);
241 }
242 LPAR exp RPAR
243 {
244 // Generate a jump out of the loop if the condition is equal to zero.
245 $1.lExit = createLabel(program);
246 genBEQ(program, $4, REG_0, $1.lExit);
247 }
248 code_block
249 {
250 // Generate a jump back to the beginning of the loop after its body.
251 genJ(program, $1.lLoop);
252 // Assign the label to the end of the loop.
253 assignLabel(program, $1.lExit);
254 }
255;
256
257/* A do-while statement repeats the execution of its code block as long as the
258 * expression is different than zero. The expression is computed at the
259 * end of each loop iteration. */
260do_while_statement
261 : DO
262 {
263 // Assign a label at the beginning of the loop for the back-edge.
264 $1 = createLabel(program);
265 assignLabel(program, $1);
266 }
267 code_block WHILE LPAR exp RPAR
268 {
269 // Generate a jump to the beginning of the loop to repeat the code block
270 // if the condition is not equal to zero.
271 genBNE(program, $6, REG_0, $1);
272 }
273;
274
275/* A return statement simply exits from the program, and hence translates to a
276 * call to the `exit' syscall. */
277return_statement
278 : RETURN
279 {
280 genExit0Syscall(program);
281 }
282;
283
284/* A read statement translates to a ReadInt syscall. The value it returns is
285 * then stored in the appropriate variable. */
286read_statement
287 : READ LPAR var_id RPAR
288 {
289 t_regID rTmp = getNewRegister(program);
290 genReadIntSyscall(program, rTmp);
291 genStoreRegisterToVariable(program, $3, rTmp);
292 }
293;
294
295/* A write statement translates to a PrintInt syscall, followed by a PrintChar
296 * syscall which prints a newline. */
297write_statement
298 : WRITE LPAR exp RPAR
299 {
300 // Generate a call to the PrintInt syscall.
301 genPrintIntSyscall(program, $3);
302 // Also generate code to print a newline after the integer.
303 t_regID rTmp = getNewRegister(program);
304 genLI(program, rTmp, '\n');
305 genPrintCharSyscall(program, rTmp);
306 }
307;
308
309/* The exp rule represents the syntax of expressions. The semantic value of
310 * the rule is the register ID that will contain the value of the expression
311 * at runtime. */
312exp
313 : NUMBER
314 {
315 $$ = getNewRegister(program);
316 genLI(program, $$, $1);
317 }
318 | var_id
319 {
320 $$ = genLoadVariable(program, $1);
321 }
322 | var_id LSQUARE exp RSQUARE
323 {
324 $$ = genLoadArrayElement(program, $1, $3);
325 }
326 | LPAR exp RPAR
327 {
328 $$ = $2;
329 }
330 | MINUS exp
331 {
332 $$ = getNewRegister(program);
333 genSUB(program, $$, REG_0, $2);
334 }
335 | exp PLUS exp
336 {
337 $$ = getNewRegister(program);
338 genADD(program, $$, $1, $3);
339 }
340 | exp MINUS exp
341 {
342 $$ = getNewRegister(program);
343 genSUB(program, $$, $1, $3);
344 }
345 | exp MUL_OP exp
346 {
347 $$ = getNewRegister(program);
348 genMUL(program, $$, $1, $3);
349 }
350 | exp DIV_OP exp
351 {
352 $$ = getNewRegister(program);
353 genDIV(program, $$, $1, $3);
354 }
355 | exp MOD_OP exp
356 {
357 $$ = getNewRegister(program);
358 genREM(program, $$, $1, $3);
359 }
360 | exp AND_OP exp
361 {
362 $$ = getNewRegister(program);
363 genAND(program, $$, $1, $3);
364 }
365 | exp XOR_OP exp
366 {
367 $$ = getNewRegister(program);
368 genXOR(program, $$, $1, $3);
369 }
370 | exp OR_OP exp
371 {
372 $$ = getNewRegister(program);
373 genOR(program, $$, $1, $3);
374 }
375 | exp SHL_OP exp
376 {
377 $$ = getNewRegister(program);
378 genSLL(program, $$, $1, $3);
379 }
380 | exp SHR_OP exp
381 {
382 $$ = getNewRegister(program);
383 genSRA(program, $$, $1, $3);
384 }
385 | exp LT exp
386 {
387 $$ = getNewRegister(program);
388 genSLT(program, $$, $1, $3);
389 }
390 | exp GT exp
391 {
392 $$ = getNewRegister(program);
393 genSGT(program, $$, $1, $3);
394 }
395 | exp EQ exp
396 {
397 $$ = getNewRegister(program);
398 genSEQ(program, $$, $1, $3);
399 }
400 | exp NOTEQ exp
401 {
402 $$ = getNewRegister(program);
403 genSNE(program, $$, $1, $3);
404 }
405 | exp LTEQ exp
406 {
407 $$ = getNewRegister(program);
408 genSLE(program, $$, $1, $3);
409 }
410 | exp GTEQ exp
411 {
412 $$ = getNewRegister(program);
413 genSGE(program, $$, $1, $3);
414 }
415 | NOT_OP exp
416 {
417 $$ = getNewRegister(program);
418 genSEQ(program, $$, $2, REG_0);
419 }
420 | exp ANDAND exp
421 {
422 t_regID rNormalizedOp1 = getNewRegister(program);
423 genSNE(program, rNormalizedOp1, $1, REG_0);
424 t_regID rNormalizedOp2 = getNewRegister(program);
425 genSNE(program, rNormalizedOp2, $3, REG_0);
426 $$ = getNewRegister(program);
427 genAND(program, $$, rNormalizedOp1, rNormalizedOp2);
428 }
429 | exp OROR exp
430 {
431 t_regID rNormalizedOp1 = getNewRegister(program);
432 genSNE(program, rNormalizedOp1, $1, REG_0);
433 t_regID rNormalizedOp2 = getNewRegister(program);
434 genSNE(program, rNormalizedOp2, $3, REG_0);
435 $$ = getNewRegister(program);
436 genOR(program, $$, rNormalizedOp1, rNormalizedOp2);
437 }
438;
439
440var_id
441 : IDENTIFIER
442 {
443 t_symbol *var = getSymbol(program, $1);
444 if (var == NULL) {
445 yyerror("variable not declared");
446 YYERROR;
447 }
448 $$ = var;
449 free($1);
450 }
451;
452
453%%
454
455t_program *parseProgram(char *fn)
456{
457 FILE *fp = fopen(fn, "r");
458 if (!fp) {
459 emitError(nullFileLocation, "could not open input file");
460 return NULL;
461 }
462
463 program = newProgram();
464 curFileLoc.file = fn;
465 curFileLoc.row = 0;
466 numErrors = 0;
467 yyin = fp;
468 yyparse();
469
470 if (numErrors > 0) {
471 fprintf(stderr, "%d error(s) generated.\n", numErrors);
472 fclose(fp);
473 deleteProgram(program);
474 return NULL;
475 }
476
477 fclose(fp);
478 return program;
479}