diff --git a/Rakefile b/Rakefile index b199aff..e6dc163 100644 --- a/Rakefile +++ b/Rakefile @@ -6,7 +6,6 @@ require 'open3' require 'rake/clean' -CROSS_GCC = '../eugenios/build/rootfs/bin/riscv32-unknown-linux-gnu-gcc' SYSROOT = '../eugenios/build/sysroot' QEMU = 'qemu-riscv32' STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort @@ -16,6 +15,10 @@ CLEAN.include 'build/boot', 'build/valid' directory 'build/boot' directory 'build/valid' +def compile(input, output) + sh ENV.fetch('CC', 'gcc'), '-nostdlib', '-o', output, input +end + task default: :boot desc 'Final stage' @@ -47,7 +50,7 @@ end rule /^build\/[[:alpha:]]+\/stage[[:digit:]]+$/ => ->(match) { "#{match}.s" } do |t| - sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites + compile(*t.prerequisites, t.name) end STAGES.each do |stage| @@ -91,5 +94,5 @@ end file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t| source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' } - sh CROSS_GCC, '-nostdlib', '-o', t.name, *source + compile(*source, t.name) end diff --git a/boot/stage5.elna b/boot/stage5.elna index a8d64bd..8fa670f 100644 --- a/boot/stage5.elna +++ b/boot/stage5.elna @@ -4,11 +4,13 @@ # Stage4 compiler. # -# - Taking value of local and global variables. Variables that doesn't begin -# with "v" are considered global. -# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; -# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). -# - Take address unary operation "@". +# - Stack size increased to 128 bytes per procedure. +# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). +# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side). +# The first parameter is in 88, the second in 84 and so forth. +# - Unary negate operation, e.g. -5. +# - Unary locical not operation "~". +# - Binary addition "+". .section .rodata @@ -37,10 +39,10 @@ keyword_begin: .ascii "begin" keyword_var: .ascii "var" .type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" +asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" .type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" +asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" .type asm_type_directive, @object asm_type_directive: .string ".type " @@ -72,11 +74,14 @@ asm_sw: .string "\tsw " .type asm_addi, @object asm_addi: .string "\taddi " -.type asm_t0, @object -asm_t0: .string "t0" +.type asm_add, @object +asm_add: .string "\tadd " -.type asm_t1, @object -asm_t1: .string "t1" +.type asm_neg, @object +asm_neg: .string "\tneg " + +.type asm_not, @object +asm_not: .string "\tnot " .type asm_comma, @object asm_comma: .string ", " @@ -486,7 +491,7 @@ end; proc _compile_integer_literal(); begin _write_z(@asm_li); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _read_token(); @@ -499,7 +504,7 @@ end; proc _compile_character_literal(); begin _write_z(@asm_li); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('\''); @@ -532,11 +537,11 @@ begin _compile_designator(); _write_z(@asm_lw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('('); - _write_z(@asm_t0); + _write_register('t', 0); _write_c(')'); _write_c('\n'); @@ -551,7 +556,39 @@ begin end; -proc _compile_expression(); +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_neg); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_not); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_term(); begin la t0, source_code_position lw t0, (t0) @@ -559,31 +596,95 @@ begin sw a0, 0(sp) li t1, '\'' - beq a0, t1, .compile_expression_character_literal + beq a0, t1, .compile_term_character_literal li t1, '@' - beq a0, t1, .compile_expression_address + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not _is_digit(v0); - bnez a0, .compile_expression_integer_literal + bnez a0, .compile_term_integer_literal - goto .compile_expression_variable; + goto .compile_term_variable; -.compile_expression_character_literal: +.compile_term_character_literal: _compile_character_literal(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_integer_literal: +.compile_term_integer_literal: _compile_integer_literal(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_address: +.compile_term_address: _compile_address_expression(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_variable: +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_variable: _compile_variable_expression(); - goto .compile_expression_end; + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Skip the operator and surrounding whitespaces. + _advance_token(3); + + _compile_term(); + + # Load the left expression from the stack; + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); .compile_expression_end: end; @@ -597,7 +698,7 @@ begin _read_token(); sw a0, 0(sp) - v4 := source_code_position + v4 := source_code_position; sw zero, 8(sp) @@ -617,14 +718,14 @@ begin # Save the argument on the stack. _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); - # Calculate the stack offset: 52 - (4 * argument_counter) + # Calculate the stack offset: 116 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 - li t1, 52 + li t1, 116 sub a0, t1, t0 _write_i(); @@ -668,11 +769,11 @@ begin _write_z(@asm_comma); - # Calculate the stack offset: 52 - (4 * argument_counter) + # Calculate the stack offset: 116 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 - li t1, 52 + li t1, 116 sub a0, t1, t0 _write_i(); @@ -712,7 +813,7 @@ begin _advance_token(1); _write_z(@asm_addi); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_z(@asm_sp); _write_z(@asm_comma); @@ -733,7 +834,7 @@ end; proc _compile_global_designator(); begin _write_z(@asm_la); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _read_token(); @@ -772,7 +873,7 @@ begin # Save the assignee address on the stack. _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_i(20); @@ -787,7 +888,7 @@ begin _compile_expression(); _write_z(@asm_lw); - _write_z(@asm_t1); + _write_register('t', 1); _write_z(@asm_comma); _write_i(20); @@ -797,11 +898,11 @@ begin _write_c('\n'); _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('('); - _write_z(@asm_t1); + _write_register('t', 1); _write_c(')'); end; @@ -855,11 +956,8 @@ end; proc _compile_procedure_body(); begin .compile_procedure_body_loop: - la a0, source_code_position - lw a0, (a0) - la a1, keyword_end - li a2, 3 # "end" length. - _memcmp(); + # 3 is "end" length. + _memcmp(source_code_position, @keyword_end, 3); beqz a0, .compile_procedure_body_epilogue @@ -869,6 +967,60 @@ begin .compile_procedure_body_epilogue: end; +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + sw a0, 0(sp) + sw a1, 4(sp) + + _write_c(); + + lw a0, 4(sp) + li t0, '0' + add a0, a0, t0 + _write_c(); +end; + +proc _compile_procedure_prologue(); +begin + _write_z(@asm_prologue); + + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z(@asm_sw); + + li a0, 'a' + lw a1, 0(sp) + _write_register(); + + _write_z(@asm_comma); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + lw t0, 0(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 88 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + proc _compile_procedure(); begin # Skip "proc ". @@ -883,8 +1035,7 @@ begin _write_token(v0); - la a0, asm_type_function - _write_z(); + _write_z(@asm_type_function); # Write procedure label, _procedure_name: _write_token(v0); @@ -896,8 +1047,7 @@ begin addi a0, a0, 10 _advance_token(); - _write_z(@asm_prologue); - + _compile_procedure_prologue(); _compile_procedure_body(); # Write the epilogue. diff --git a/boot/stage6.elna b/boot/stage6.elna index a8d64bd..8fa670f 100644 --- a/boot/stage6.elna +++ b/boot/stage6.elna @@ -4,11 +4,13 @@ # Stage4 compiler. # -# - Taking value of local and global variables. Variables that doesn't begin -# with "v" are considered global. -# - Simple variable assignment, e.g. v0 := 5 or v0 := global_variable; -# 7 words on the stack, 28 - 56, are reversed for procedure arguments (caller side). -# - Take address unary operation "@". +# - Stack size increased to 128 bytes per procedure. +# 7 words on the stack, 92 - 120, are reversed for procedure arguments (caller side). +# 7 words on the stack, 64 - 92, are reserved for procedure parameters (callee side). +# The first parameter is in 88, the second in 84 and so forth. +# - Unary negate operation, e.g. -5. +# - Unary locical not operation "~". +# - Binary addition "+". .section .rodata @@ -37,10 +39,10 @@ keyword_begin: .ascii "begin" keyword_var: .ascii "var" .type asm_prologue, @object -asm_prologue: .string "\taddi sp, sp, -64\n\tsw ra, 60(sp)\n\tsw s0, 56(sp)\n\taddi s0, sp, 64\n" +asm_prologue: .string "\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n" .type asm_epilogue, @object -asm_epilogue: .string "\tlw ra, 60(sp)\n\tlw s0, 56(sp)\n\taddi sp, sp, 64\n\tret\n" +asm_epilogue: .string "\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n" .type asm_type_directive, @object asm_type_directive: .string ".type " @@ -72,11 +74,14 @@ asm_sw: .string "\tsw " .type asm_addi, @object asm_addi: .string "\taddi " -.type asm_t0, @object -asm_t0: .string "t0" +.type asm_add, @object +asm_add: .string "\tadd " -.type asm_t1, @object -asm_t1: .string "t1" +.type asm_neg, @object +asm_neg: .string "\tneg " + +.type asm_not, @object +asm_not: .string "\tnot " .type asm_comma, @object asm_comma: .string ", " @@ -486,7 +491,7 @@ end; proc _compile_integer_literal(); begin _write_z(@asm_li); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _read_token(); @@ -499,7 +504,7 @@ end; proc _compile_character_literal(); begin _write_z(@asm_li); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('\''); @@ -532,11 +537,11 @@ begin _compile_designator(); _write_z(@asm_lw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('('); - _write_z(@asm_t0); + _write_register('t', 0); _write_c(')'); _write_c('\n'); @@ -551,7 +556,39 @@ begin end; -proc _compile_expression(); +proc _compile_negate_expression(); +begin + # Skip the "-" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_neg); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_not_expression(); +begin + # Skip the "~" sign. + _advance_token(1); + _compile_term(); + + _write_z(@asm_not); + _write_register('t', 0); + + _write_z(@asm_comma); + _write_register('t', 0); + + _write_c('\n'); + +end; + +proc _compile_term(); begin la t0, source_code_position lw t0, (t0) @@ -559,31 +596,95 @@ begin sw a0, 0(sp) li t1, '\'' - beq a0, t1, .compile_expression_character_literal + beq a0, t1, .compile_term_character_literal li t1, '@' - beq a0, t1, .compile_expression_address + beq a0, t1, .compile_term_address + + li t1, '-' + beq a0, t1, .compile_term_negation + + li t1, '~' + beq a0, t1, .compile_term_not _is_digit(v0); - bnez a0, .compile_expression_integer_literal + bnez a0, .compile_term_integer_literal - goto .compile_expression_variable; + goto .compile_term_variable; -.compile_expression_character_literal: +.compile_term_character_literal: _compile_character_literal(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_integer_literal: +.compile_term_integer_literal: _compile_integer_literal(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_address: +.compile_term_address: _compile_address_expression(); - goto .compile_expression_end; + goto .compile_term_end; -.compile_expression_variable: +.compile_term_negation: + _compile_negate_expression(); + goto .compile_term_end; + +.compile_term_not: + _compile_not_expression(); + goto .compile_term_end; + +.compile_term_variable: _compile_variable_expression(); - goto .compile_expression_end; + goto .compile_term_end; + +.compile_term_end: +end; + +proc _compile_expression(); +begin + _compile_term(); + + la t0, source_code_position + lw t0, (t0) + lb a0, (t0) + + li t1, ' ' + bne a0, t1, .compile_expression_end + + # It is a binary expression. + + # Save the value of the left expression on the stack. + _write_z(@asm_sw); + _write_register('t', 0); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Skip the operator and surrounding whitespaces. + _advance_token(3); + + _compile_term(); + + # Load the left expression from the stack; + _write_z(@asm_lw); + _write_register('t', 1); + _write_z(@asm_comma); + _write_i(24); + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + # Execute the operation. + _write_z(@asm_add); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 0); + _write_z(@asm_comma); + _write_register('t', 1); + _write_c('\n'); .compile_expression_end: end; @@ -597,7 +698,7 @@ begin _read_token(); sw a0, 0(sp) - v4 := source_code_position + v4 := source_code_position; sw zero, 8(sp) @@ -617,14 +718,14 @@ begin # Save the argument on the stack. _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); - # Calculate the stack offset: 52 - (4 * argument_counter) + # Calculate the stack offset: 116 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 - li t1, 52 + li t1, 116 sub a0, t1, t0 _write_i(); @@ -668,11 +769,11 @@ begin _write_z(@asm_comma); - # Calculate the stack offset: 52 - (4 * argument_counter) + # Calculate the stack offset: 116 - (4 * argument_counter) lw t0, 8(sp) li t1, 4 mul t0, t0, t1 - li t1, 52 + li t1, 116 sub a0, t1, t0 _write_i(); @@ -712,7 +813,7 @@ begin _advance_token(1); _write_z(@asm_addi); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_z(@asm_sp); _write_z(@asm_comma); @@ -733,7 +834,7 @@ end; proc _compile_global_designator(); begin _write_z(@asm_la); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _read_token(); @@ -772,7 +873,7 @@ begin # Save the assignee address on the stack. _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_i(20); @@ -787,7 +888,7 @@ begin _compile_expression(); _write_z(@asm_lw); - _write_z(@asm_t1); + _write_register('t', 1); _write_z(@asm_comma); _write_i(20); @@ -797,11 +898,11 @@ begin _write_c('\n'); _write_z(@asm_sw); - _write_z(@asm_t0); + _write_register('t', 0); _write_z(@asm_comma); _write_c('('); - _write_z(@asm_t1); + _write_register('t', 1); _write_c(')'); end; @@ -855,11 +956,8 @@ end; proc _compile_procedure_body(); begin .compile_procedure_body_loop: - la a0, source_code_position - lw a0, (a0) - la a1, keyword_end - li a2, 3 # "end" length. - _memcmp(); + # 3 is "end" length. + _memcmp(source_code_position, @keyword_end, 3); beqz a0, .compile_procedure_body_epilogue @@ -869,6 +967,60 @@ begin .compile_procedure_body_epilogue: end; +# Writes a regster name to the standard output. +# +# Parameters: +# a0 - Register character. +# a1 - Register number. +proc _write_register(); +begin + sw a0, 0(sp) + sw a1, 4(sp) + + _write_c(); + + lw a0, 4(sp) + li t0, '0' + add a0, a0, t0 + _write_c(); +end; + +proc _compile_procedure_prologue(); +begin + _write_z(@asm_prologue); + + v0 := 0; + +.compile_procedure_prologue_loop: + _write_z(@asm_sw); + + li a0, 'a' + lw a1, 0(sp) + _write_register(); + + _write_z(@asm_comma); + + # Calculate the stack offset: 88 - (4 * parameter_counter) + lw t0, 0(sp) + li t1, 4 + mul t0, t0, t1 + li t1, 88 + sub a0, t1, t0 + _write_i(); + + _write_c('('); + _write_z(@asm_sp); + _write_c(')'); + _write_c('\n'); + + lw a0, 0(sp) + addi a0, a0, 1 + sw a0, 0(sp) + + li t0, 8 + bne a0, t0, .compile_procedure_prologue_loop +end; + proc _compile_procedure(); begin # Skip "proc ". @@ -883,8 +1035,7 @@ begin _write_token(v0); - la a0, asm_type_function - _write_z(); + _write_z(@asm_type_function); # Write procedure label, _procedure_name: _write_token(v0); @@ -896,8 +1047,7 @@ begin addi a0, a0, 10 _advance_token(); - _write_z(@asm_prologue); - + _compile_procedure_prologue(); _compile_procedure_body(); # Write the epilogue.