diff --git a/Rakefile b/Rakefile index 3fa065e..ef74301 100644 --- a/Rakefile +++ b/Rakefile @@ -37,7 +37,7 @@ end directory 'build' desc 'Initial stage' -file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'build'] do |t| +file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t| source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' } sh CROSS_GCC, '-nostdlib', '-o', t.name, *source diff --git a/boot/common-boot.s b/boot/common-boot.s index e8eba52..e5796f1 100644 --- a/boot/common-boot.s +++ b/boot/common-boot.s @@ -1,5 +1,6 @@ .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower -.global _write_out, _read_file, _memcmp, _write_error, _put_char, _printi +.global _write_out, _read_file, _write_error, _put_char, _printi +.global _get, _memcmp .global _divide_by_zero_error, _exit .section .rodata @@ -187,7 +188,9 @@ _write_out: # a0 - Buffer pointer. # a1 - Buffer size. # -# Returns the result in a0. +# Sets s1 to the buffer passed in a0. +# +# Returns the amount of bytes written in a0. .type _read_file, @function _read_file: # Prologue. @@ -196,9 +199,11 @@ _read_file: sw s0, 0(sp) addi s0, sp, 8 - mv a2, a1 - mv a1, a0 + mv s1, a0 + li a0, STDIN + mv a2, a1 + mv a1, s1 li a7, SYS_READ ecall @@ -293,3 +298,11 @@ _put_char: lw s0, 8(sp) add sp, sp, 16 ret + +# a0 - Pointer to an array to get the first element. +# +# Dereferences a pointer and returns what is on the address in a0. +.type _get, @function +_get: + lw a0, (a0) + ret diff --git a/boot/stage1.s b/boot/stage1.s index 61dcdec..7137edc 100644 --- a/boot/stage1.s +++ b/boot/stage1.s @@ -207,6 +207,130 @@ _build_binary_expression: addi sp, sp, 32 ret +# Parameters: +# a0 - Identifier length. +# a1 - Register number as character. +.type _build_identifier_expression, @function +_build_identifier_expression: + # Prologue. + addi sp, sp, -32 + sw ra, 28(sp) + sw s0, 24(sp) + addi s0, sp, 32 + + sw a0, 20(sp) # Identifier length. + sw a1, 16(sp) # Register number as character. + + li t0, 0x61636f6c # loca + sw t0, 12(sp) + mv a0, s1 + addi a1, sp, 12 + li a2, 4 + call _memcmp + beqz a0, .Lbuild_identifier_expression_local + + lbu a0, (s1) + lw t0, 20(sp) + addi t0, t0, -2 + seqz t0, t0 + addi t1, a0, -'s' + seqz t1, t1 + and t0, t0, t1 + bnez t0, .Lbuild_identifier_expression_saved + + # Global identifier. + lw t1, 16(sp) + li t0, 0x00202c00 # \0,_ + or t0, t0, t1 + sw t0, 12(sp) + li t0, 0x6120616c # la a + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 7 + call _write_out + + mv a0, s1 + lw a1, 20(sp) + call _write_out + + li a0, '\n' + call _put_char + + lbu a0, (s1) + call _is_upper + beqz a0, .Lbuild_identifier_expression_end + + lw t1, 16(sp) + li t0, 0x0a290061 # a\0)\n + sll t2, t1, 8 + or t0, t0, t2 + sw t0, 12(sp) + li t0, 0x28202c00 # \0, ( + or t0, t0, t1 + sw t0, 8(sp) + li t0, 0x6120776c # lw a + sw t0, 4(sp) + addi a0, sp, 4 + li a1, 12 + call _write_out + + j .Lbuild_identifier_expression_end + +.Lbuild_identifier_expression_saved: + li t0, 0x00202c00 # \0,_ + lw t1, 16(sp) + or t0, t0, t1 + sw t0, 12(sp) + li t0, 0x6120766d # mv a + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 7 + call _write_out + + mv a0, s1 + lw a1, 20(sp) + call _write_out + + li a0, '\n' + call _put_char + + j .Lbuild_identifier_expression_end + +.Lbuild_identifier_expression_local: + lw t1, 16(sp) + li t0, 0x00202c00 # \0,_ + or t0, t0, t1 + sw t0, 12(sp) + li t0, 0x6120776c # lw a + sw t0, 8(sp) + addi a0, sp, 8 + li a1, 7 + call _write_out + + mv a0, s1 + lw a1, 20(sp) + addi a0, a0, 4 # Skip the "loca" variable prefix. + addi a1, a1, -4 # Skip the "loca" variable prefix. + call _write_out + + li t0, 0x29707328 # (sp) + sw t0, 12(sp) + addi a0, sp, 12 + li a1, 4 + call _write_out + li a0, '\n' + call _put_char + + j .Lbuild_identifier_expression_end + +.Lbuild_identifier_expression_end: + + # Epilogue. + lw ra, 28(sp) + lw s0, 24(sp) + addi sp, sp, 32 + ret + # Evalutes an expression and saves the result in a0. # # a0 - X in aX, the register number to save the result. @@ -242,31 +366,9 @@ _build_expression: li t0, '_' beq a0, t0, .Lbuild_expression_call - li t0, 0x61636f6c # loca - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp - beqz a0, .Lbuild_expression_identifier - - # Named identifier. - lw t1, 28(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 8(sp) - li t0, 0x6120616c # la a - sw t0, 4(sp) - addi a0, sp, 4 - li a1, 7 - call _write_out - - lw a0, 24(sp) - lw a1, 20(sp) - call _write_out - - li a0, '\n' - call _put_char + lw a0, 20(sp) + lw a1, 28(sp) + call _build_identifier_expression j .Lbuild_expression_advance @@ -313,33 +415,6 @@ _build_expression: j .Lbuild_expression_advance -.Lbuild_expression_identifier: - lw t1, 28(sp) - li t0, 0x00202c00 # \0,_ - or t0, t0, t1 - sw t0, 16(sp) - li t0, 0x6120776c # lw a - sw t0, 12(sp) - addi a0, sp, 12 - li a1, 7 - call _write_out - - lw a0, 24(sp) - lw a1, 20(sp) - addi a0, a0, 4 # Skip the "loca" variable prefix. - addi a1, a1, -4 # Skip the "loca" variable prefix. - call _write_out - - li t0, '\n' - sw t0, 16(sp) - li t0, 0x29707328 # (sp) - sw t0, 12(sp) - addi a0, sp, 12 - li a1, 5 - call _write_out - - j .Lbuild_expression_advance - .Lbuild_expression_call: lw a0, 24(sp) lw a1, 20(sp) @@ -683,24 +758,6 @@ _skip_spaces: .Lspace_loop_end: ret -# Skips tabs at the line beginning. -.type _skip_indentation, @function -_skip_indentation: -.Lskip_indentation_do: - lbu t0, (s1) - - li t1, '\t' - beq t0, t1, .Lskip_indentation_skip - - j .Lskip_indentation_end - -.Lskip_indentation_skip: - addi s1, s1, 1 - j .Lskip_indentation_do - -.Lskip_indentation_end: - ret - # Parameters: # a0 - Line length. .type _skip_comment, @function @@ -1134,7 +1191,7 @@ _compile_procedure: # Generate the body of the procedure. .Lcompile_procedure_body: - call _skip_indentation + call _skip_spaces call _read_line sw a0, 12(sp) li t0, 0x0a646e65 # end\n @@ -1427,23 +1484,19 @@ _compile_line: li t1, '(' beq t0, t1, .Lcompile_line_comment - li t0, 0x0a6d6172 # ram\n - sw t0, 12(sp) li t0, 0x676f7270 # prog - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 8 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_program - li t0, 0x0a74 # t\n - sw t0, 12(sp) li t0, 0x736e6f63 # cons - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 6 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_const @@ -1455,23 +1508,19 @@ _compile_line: call _memcmp beqz a0, .Lcompile_line_var - li t0, 0x20 # _ - sw t0, 12(sp) li t0, 0x636f7270 # proc - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 5 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_procedure - li t0, 0x0a6e # n\n - sw t0, 12(sp) li t0, 0x69676562 # begi - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 6 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_begin @@ -1491,13 +1540,11 @@ _compile_line: call _memcmp beqz a0, .Lcompile_line_identifier - li t0, 0x7472 # rt - sw t0, 12(sp) li t0, 0x6f706d69 # impo - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 6 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_import @@ -1509,13 +1556,11 @@ _compile_line: call _memcmp beqz a0, .Lcompile_line_goto - li t0, 0x6e72 # rn - sw t0, 12(sp) li t0, 0x75746572 # retu - sw t0, 8(sp) + sw t0, 12(sp) mv a0, s1 - addi a1, sp, 8 - li a2, 6 + addi a1, sp, 12 + li a2, 4 call _memcmp beqz a0, .Lcompile_line_return @@ -1716,7 +1761,7 @@ _compile: lbu t0, (s1) # t0 = Current character. beqz t0, .Lcompile_end # Exit the loop on the NUL character. - call _skip_indentation + call _skip_spaces call _read_line lw a1, 4(sp) call _compile_line @@ -1736,16 +1781,35 @@ _compile: addi sp, sp, 16 ret -# Entry point. -.type _start, @function -_start: +.type _main, @function +_main: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + # Read the source from the standard input. la a0, source_code li a1, SOURCE_BUFFER_SIZE # Buffer size. call _read_file li s2, 1 - la s1, source_code # s1 = Source code position. + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Entry point. +.type _start, @function +_start: + call _tokenizer_initialize + li a1, 50 + call _write_error + + call _main call _compile # Call exit. diff --git a/boot/stage2.elna b/boot/stage2.elna index 0eb713c..3cb2deb 100644 --- a/boot/stage2.elna +++ b/boot/stage2.elna @@ -39,7 +39,10 @@ begin end proc _build_binary_expression() -var loca0, loca4, loca8, loca20: Word +var + loca0, loca4, loca8, loca16, loca20: Word + loca12: ^Byte + loca24: Bool begin _build_expression(0) @@ -48,60 +51,51 @@ begin _skip_spaces() loca20 := _read_token() + loca12 := _current() - li t0, 0x26 # & - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_and + loca16 := 0x26 (* & *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_and + end - li t0, 0x726f # or - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_or + loca16 := 0x726f (* or *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_or + end - li t0, 0x3d # = - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_equal + loca16 := 0x3d (* = *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_equal + end - li t0, 0x2b # + - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_plus + loca16 := 0x2b (* + *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_plus + end - li t0, 0x2d # - - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_minus + loca16 := 0x2d (* - *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_minus + end - li t0, 0x2a # * - sw t0, 16(sp) - mv a0, s1 - lw a1, 20(sp) - addi a2, sp, 16 - call _token_compare - beqz a0, .L_build_binary_expression_product + loca16 := 0x2a (* * *) + loca24 := _token_compare(loca12, loca20, @loca16) + if loca24 = 0 then + goto .L_build_binary_expression_product + end goto .Lbuild_binary_expression_end .L_build_binary_expression_equal _advance(1) (* Skip =. *) _build_expression(1) + + loca0 := 0x627573(* sub *) _write_out(@loca0, 3) _write_out(@loca4, 4) _write_out(@loca4, 4) @@ -110,7 +104,8 @@ begin loca0 := 0x7a716573 (* seqz *) _write_out(@loca0, 4) _write_out(@loca4, 4) - _write_out(@loca4, 4) + _write_out(@loca4, 3) + _put_char(0x0a) (* \n *) goto .Lbuild_binary_expression_end @@ -172,6 +167,67 @@ begin .Lbuild_binary_expression_end end +proc _build_identifier_expression(loca84: Word, loca80: Byte) +begin + loca24 := _current() + loca0 := 0x61636f6c (* loca *) + loca0 := _memcmp(@loca0, loca24, 4) + + if loca0 = 0 then + loca8 := 0x6120776c (* lw a *) + _write_out(@loca8, 4) + loca8 := 0x00202c00 or loca80 (* \0,_ *) + _write_out(@loca8, 3) + + loca4 := loca24 + 4 + loca0 := loca84 - 4 + _write_out(loca4, loca0) (* Skip the "loca" variable prefix. *) + + loca8 := 0x29707328 (* (sp) *) + _write_out(@loca8, 4) + _put_char(0x0a) (* \n *) + + goto .Lbuild_identifier_expression_end + end + loca0 := _front(loca24) + loca8 := loca84 = 2 + loca12 := loca0 = 0x73 + if loca8 & loca12 then + loca8 := 0x6120766d (* mv a *) + _write_out(@loca8, 4) + loca8 := 0x00202c00 or loca80 (* \0,_ *) + _write_out(@loca8, 3) + _write_out(loca24, loca84) + _put_char(0x0a) (* \n *) + + goto .Lbuild_identifier_expression_end + end + + (* Global identifier. *) + loca8 := 0x6120616c (* la a *) + _write_out(@loca8, 4) + loca8 := 0x00202c00 or loca80 + _write_out(@loca8, 3) + + _write_out(loca24, loca84) + _put_char(0x0a) + + if _is_upper(loca0) then + loca8 := 0x6120776c (* lw a *) + _write_out(@loca8, 4) + loca8 := 0x28202c00 or loca28 (* \0, ( *) + _write_out(@loca8, 4) + _put_char(0x61) (* a *) + _put_char(loca28) + _put_char(0x29) (* ) *) + _put_char(0x0a) (* \n *) + + goto .Lbuild_identifier_expression_end + end + + .Lbuild_identifier_expression_end +end + (* Evalutes an expression and saves the result in a0. @@ -188,41 +244,28 @@ begin _skip_spaces() loca20 := _read_token() loca24 := _current() + loca0 := _front(loca24) - lbu a0, (s1) - li t0, '-' - beq a0, t0, .Lbuild_expression_negate + (* - *) + if loca0 = 0x2d then + goto .Lbuild_expression_negate + end - lbu a0, (s1) - li t0, '@' - beq a0, t0, .Lbuild_expression_address + (* @ *) + if loca0 = 0x40 then + goto .Lbuild_expression_address + end - loca0 := _front() if _is_digit(loca0) then goto .Lbuild_expression_literal end - lbu a0, (s1) - li t0, '_' - beq a0, t0, .Lbuild_expression_call - - li t0, 0x61636f6c # loca - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp - beqz a0, .Lbuild_expression_identifier - - (* Named identifier. *) - loca8 := 0x6120616c (* la a *) - _write_out(@loca8, 4) - loca8 := 0x00202c00 or loca28 - _write_out(@loca8, 3) - - _write_out(loca24, loca20) - _put_char(0x0a) + (* _ *) + if loca0 = 0x5f then + goto .Lbuild_expression_call + end + _build_identifier_expression(loca20, loca28); goto .Lbuild_expression_advance .Lbuild_expression_negate @@ -263,22 +306,6 @@ begin goto .Lbuild_expression_advance - .Lbuild_expression_identifier - loca8 := 0x6120776c (* lw a *) - _write_out(@loca8, 4) - loca8 := 0x00202c00 or loca28 # \0,_ - _write_out(@loca8, 3) - - loca4 := loca24 + 4 - loca0 := loca20 - 4 - _write_out(loca4, loca0) (* Skip the "loca" variable prefix. *) - - loca8 := 0x29707328 (* (sp) *) - _write_out(@loca8, 4) - _put_char(0x0a) (* \n *) - - goto .Lbuild_expression_advance - .Lbuild_expression_call _advance(loca20) _advance(1) @@ -313,6 +340,7 @@ proc _compile_identifier() var loca0, loca16, loca8: Word loca20, loca12: ^Byte + loca4: Bool begin (* Save the pointer to the identifier and its length on the stack. *) loca20 := _current() @@ -325,21 +353,18 @@ begin loca12 := _current() loca8 := _read_token() - _advance(loca8) # Skip that token. + _advance(loca8) (* Skip that token. *) _skip_spaces() - li t0, 0x3d3a # := - sw t0, 4(sp) - lw a0, 12(sp) - lw a1, 8(sp) - addi a2, sp, 4 - call _token_compare - beqz a0, .Lcompile_identifier_assign + loca0 := 0x3d3a (* := *) + loca4 := _token_compare(loca12, loca8, @loca0) + if loca4 = 0 then + goto .Lcompile_identifier_assign + end - lw t0, 12(sp) - lbu t0, (t0) - li t1, 0x28 # ( - beq t0, t1, .Lcompile_identifier_call + if _front(loca12) = 0x28 then + goto .Lcompile_identifier_call + end goto .Lcompile_identifier_end @@ -379,14 +404,16 @@ Returns the procedure result in a0. proc _compile_call(loca84: ^Byte, loca80: Word) var loca0, loca4, loca12: Word + loca8: ^Byte begin loca12 := 0 (* Argument count for a procedure call. *) .Lcompile_call_paren _skip_spaces() - lbu t0, (s1) - li t1, 0x29 # ) - beq t0, t1, .Lcompile_call_complete + loca8 := _current() + if _front(loca8) = 0x29 then + goto .Lcompile_call_complete + end .Lcompile_call_argument _build_expression(0) @@ -409,9 +436,11 @@ begin _put_char(0x0a) (* \n *) _skip_spaces() - lbu t0, (s1) - li t1, ',' - bne t0, t1, .Lcompile_call_paren + loca8 := _current() + loca0 := _front(loca8) = 0x2c + if loca0 = 0 then + goto .Lcompile_call_paren + end loca12 := loca12 + 1 (* Argument count for a procedure call. *) @@ -497,64 +526,97 @@ Reads a token and returns its length in a0. _read_token doesn't change s1, it finds the length of the token s1 is pointing to. *) proc _read_token() -var loca4: Word +var + loca0, loca4: Word + loca8: ^Byte begin - lbu t0, (s1) # t0 = Current character. + loca8 := _current() + loca0 := _front(loca8) (* t0 = Current character. *) loca4 := 0 - li t1, '.' - beq t0, t1, .Ltoken_character_single + (* . *) + if loca0 = 0x2e then + goto .Ltoken_character_single + end - li t1, ',' - beq t0, t1, .Ltoken_character_single + (* , *) + if loca0 = 0x2c then + goto .Ltoken_character_single + end - li t1, ':' - beq t0, t1, .Ltoken_character_colon + (* : *) + if loca0 = 0x3a then + goto .Ltoken_character_colon + end - li t1, ';' - beq t0, t1, .Ltoken_character_single + (* ; *) + if loca0 = 0x3b then + goto .Ltoken_character_single + end - li t1, '(' - beq t0, t1, .Ltoken_character_single + (* ( *) + if loca0 = 0x28 then + goto .Ltoken_character_single + end - li t1, ')' - beq t0, t1, .Ltoken_character_single + (* ) *) + if loca0 = 0x29 then + goto .Ltoken_character_single + end - li t1, '[' - beq t0, t1, .Ltoken_character_single + (* [ *) + if loca0 = 0x5b then + goto .Ltoken_character_single + end - li t1, ']' - beq t0, t1, .Ltoken_character_single + (* ] *) + if loca0 = 0x5d then + goto .Ltoken_character_single + end - li t1, '^' - beq t0, t1, .Ltoken_character_single + (* ^ *) + if loca0 = 0x5e then + goto .Ltoken_character_single + end - li t1, '&' - beq t0, t1, .Ltoken_character_single + (* & *) + if loca0 = 0x26 then + goto .Ltoken_character_single + end - li t1, '=' - beq t0, t1, .Ltoken_character_single + (* = *) + if loca0 = 0x3d then + goto .Ltoken_character_single + end - li t1, '+' - beq t0, t1, .Ltoken_character_single + (* + *) + if loca0 = 0x2b then + goto .Ltoken_character_single + end - li t1, '-' - beq t0, t1, .Ltoken_character_single + (* - *) + if loca0 = 0x2d then + goto .Ltoken_character_single + end - li t1, '*' - beq t0, t1, .Ltoken_character_single + (* * *) + if loca0 = 0x2a then + goto .Ltoken_character_single + end + + (* @ *) + if loca0 = 0x40 then + goto .Ltoken_character_single + end - li t1, '@' - beq t0, t1, .Ltoken_character_single (* Expect an identifier or a number. *) .Ltoken_character_loop_do - lw t6, 4(sp) - add t1, s1, t6 - lbu a0, (t1) # a0 = Current character. + loca0 := loca8 + loca4 + loca0 := _front(loca0) - call _is_alnum - - beqz a0, .Ltoken_character_end + if _is_alnum(loca0) = 0 then + goto .Ltoken_character_end + end loca4 := loca4 + 1 goto .Ltoken_character_loop_do @@ -563,31 +625,46 @@ begin goto .Ltoken_character_end .Ltoken_character_colon - lbu t0, 1(s1) # t0 = The character after the colon. + loca0 := loca8 + 1 + loca0 := _front(loca0) (* t0 = The character after the colon. *) loca4 := loca4 + 1 - li t1, '=' - beq t0, t1, .Ltoken_character_single + (* = *) + if loca0 = 0x3d then + goto .Ltoken_character_single + end goto .Ltoken_character_end .Ltoken_character_end - lw a0, 4(sp) + return loca4 end (* Skips the spaces till the next non space character. *) proc _skip_spaces() +var + loca0: Byte + loca4: ^Byte begin .Lspace_loop_do - lbu t0, (s1) # t0 = Current character. + loca4 := _current() + loca0 := _front(loca4) (* t0 = Current character. *) - li t1, ' ' - beq t0, t1, .Lspace_loop_repeat - li t1, '\t' - beq t0, t1, .Lspace_loop_repeat - li t1, '\n' - beq t0, t1, .Lspace_loop_repeat - li t1, '\r' - beq t0, t1, .Lspace_loop_repeat + (* _ *) + if loca0 = 0x20 then + goto .Lspace_loop_repeat + end + (* \t *) + if loca0 = 0x09 then + goto .Lspace_loop_repeat + end + (* \n *) + if loca0 = 0x0a then + goto .Lspace_loop_repeat + end + (* \r *) + if loca0 = 0x0d then + goto .Lspace_loop_repeat + end goto .Lspace_loop_end .Lspace_loop_repeat @@ -597,50 +674,37 @@ begin .Lspace_loop_end end -(* Skips tabs at the line beginning. *) -proc _skip_indentation() -begin - .Lskip_indentation_do - lbu t0, (s1) - - li t1, '\t' - beq t0, t1, .Lskip_indentation_skip - - goto .Lskip_indentation_end - - .Lskip_indentation_skip - _advance(1) - goto .Lskip_indentation_do - - .Lskip_indentation_end -end - (* Parameters: a0 - Line length. *) proc _skip_comment(loca84: Word) +var + loca0: ^Byte + loca4: Word + loca8: Int begin - (* Check whether this is a comment. *) - li t0, 0x2a28 # (* - sw t0, 4(sp) - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - bnez a0, .Lskip_comment_end + loca0 := _current() + (* Check whether this is a comment. *) + loca4 := 0x2a28 (* ( and * *) + loca8 := _memcmp(loca0, @loca4, 2) + if loca8 = 0 then + goto .Lskip_comment_continue + end + goto .Lskip_comment_end + + .Lskip_comment_continue _advance(2) (* Skip (*. *) - li t0, 0x292a # *) - sw t0, 4(sp) + loca4 := 0x292a (* ( and * *) .Lskip_comment_loop - addi a0, sp, 4 - mv a1, s1 - li a2, 2 - call _memcmp - beqz a0, .Lskip_comment_close + loca0 := _current() + loca8 := _memcmp(loca0, @loca4, 2) + if loca8 = 0 then + goto .Lskip_comment_close + end _advance(1) @@ -687,7 +751,9 @@ begin end proc _compile_constant_section() -var loca0: Word +var + loca0: Word + loca4: ^Byte begin (* .section .rodata *) loca0 := 0x6365732e (* .sec *) @@ -705,9 +771,11 @@ begin .Lcompile_constant_section_item _skip_spaces() - lbu a0, (s1) - call _is_upper - beqz a0, .Lcompile_constant_section_end + loca4 := _current() + loca0 := _front(loca4) + if _is_upper(loca0) = 0 then + goto .Lcompile_constant_section_end + end _compile_constant() goto .Lcompile_constant_section_item @@ -746,7 +814,9 @@ begin end proc _compile_variable_section() -var loca0: Word +var + loca0: Word + loca4: ^Byte begin (* .section .bss *) loca0 := 0x6365732e (* .sec *) @@ -762,10 +832,12 @@ begin .Lcompile_variable_section_item _skip_spaces() - lbu a0, (s1) - call _is_lower - beqz a0, .Lcompile_variable_section_end + loca4 := _current() + loca0 := _front(loca4) + if _is_lower(loca0) = 0 then + goto .Lcompile_variable_section_end + end _compile_variable() goto .Lcompile_variable_section_item @@ -846,7 +918,7 @@ end proc _compile_procedure() var loca0, loca4, loca8, loca12, loca16: Word - loca20: ^Char + loca20, loca24: ^Byte begin _advance(5) (* Skip proc_ *) loca16 := _read_token() @@ -887,15 +959,15 @@ begin *) .Lcompile_procedure_begin _skip_spaces() - call _read_token + loca0 := _read_token() - mv a1, a0 - mv a0, s1 - addi a2, sp, 8 - add s1, s1, a1 - call _token_compare + loca24 := _current() + _advance(loca0) + loca0 := _token_compare(loca24, loca0, @loca8) - bnez a0, .Lcompile_procedure_begin + if loca0 = 1 then + goto .Lcompile_procedure_begin + end (* Generate the procedure prologue with a predefined stack size. *) loca0 := 0x69646461 (* addi *) @@ -982,33 +1054,30 @@ begin (* Generate the body of the procedure. *) .Lcompile_procedure_body - _skip_indentation() - call _read_line - sw a0, 12(sp) - li t0, 0x0a646e65 # end\n - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 4 - call _memcmp + _skip_spaces() + loca12 := _read_line() + loca8 := 0x0a646e65 (* end\n *) + loca24 := _current() + loca8 := _memcmp(loca24, @loca8, 4) - beqz a0, .Lcompile_procedure_end + if loca8 = 0 then + goto .Lcompile_procedure_end + end - lw a0, 12(sp) - call _compile_line + _compile_line(loca12) goto .Lcompile_procedure_body .Lcompile_procedure_end _advance(4) (* Skip end\n. *) (* Generate the procedure epilogue with a predefined stack size. *) - loca0 := 0x7220776c # lw r + loca0 := 0x7220776c (* lw r *) _write_out(@loca0, 4) - loca0 := 0x39202c61 # a, 9 + loca0 := 0x39202c61 (* a, 9 *) _write_out(@loca0, 4) - loca0 := 0x70732832 # 2(sp + loca0 := 0x70732832 (* 2(sp *) _write_out(@loca0, 4) - loca0 := 0x0a29 # )\n + loca0 := 0x0a29 (* )\n *) _write_out(@loca0, 2) loca0 := 0x7320776c (* lw s *) @@ -1043,41 +1112,51 @@ Compares two string, which of one has a length, the other one is null-terminated If the strings match sets a0 to 0, otherwise sets it to 1. *) -proc _token_compare() +proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte) +var + loca0: Bool + loca4, loca12: Byte + loca8: Word begin - addi t0, a0, 0 - addi t1, a1, 0 - addi t2, a2, 0 - .Ltoken_compare_loop - lbu t3, (t2) + loca4 := _front(loca76) (* Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the another string is 0. *) - or t4, t3, t1 - beqz t4, .Ltoken_compare_equal + loca8 := loca4 or loca80 + if loca8 = 0 then + goto .Ltoken_compare_equal + end + if loca80 = 0 then + goto .Ltoken_compare_not_equal + end + if loca4 = 0 then + goto .Ltoken_compare_not_equal + end + loca12 := _front(loca84) + if loca4 = loca12 then + goto .Ltoken_compare_continue + end + goto .Ltoken_compare_not_equal - beqz t1, .Ltoken_compare_not_equal - beqz t3, .Ltoken_compare_not_equal + .Ltoken_compare_continue - lbu t4, (t0) - bne t3, t4, .Ltoken_compare_not_equal - - addi t0, t0, 1 - addi t1, t1, -1 - addi t2, t2, 1 + loca84 := loca84 + 1 + loca80 := loca80 - 1 + loca76 := loca76 + 1 goto .Ltoken_compare_loop .Ltoken_compare_not_equal - li a0, 1 + loca0 := 1 goto .Ltoken_compare_end .Ltoken_compare_equal - li a0, 0 + loca0 := 0 .Ltoken_compare_end + return loca0 end proc _compile_goto() @@ -1112,15 +1191,13 @@ begin loca0 := _current() _write_out(loca0, loca84) - lw t0, 84(sp) # Line length. - mv t1, s1 # Line start. + loca0 := loca0 + loca84 + loca0 := loca0 - 1 (* Last character on the line. *) - add t1, t1, t0 - addi t1, t1, -1 # Last character on the line. - - lbu t1, (t1) - li t2, ':' - beq t1, t2, .Lcompile_label_colon + loca0 := _front(loca0) + if loca0 = 0x3a then + goto .Lcompile_label_colon + end _put_char(0x3a) (* : *) @@ -1161,8 +1238,7 @@ begin (* Write the label *) _write_out(@loca16, 4) - mv a0, s2 - call _printi + _printi(s2) _put_char(0x0a) (* \n *) @@ -1182,13 +1258,13 @@ begin (* Write the label *) _write_out(@loca16, 4) - mv a0, s2 - call _printi + _printi(s2) loca12 := 0x0a3a0a3a (* :\n:\n *) _write_out(@loca12, 2) - addi s2, s2, 1 # Increment the label counter. + (* Increment the label counter. *) + addi s2, s2, 1 _advance(4) (* Skip the end with newline. *) end @@ -1202,119 +1278,99 @@ Returns 1 in a0 if the parsed line contained a text section element such a procedure or the program entry point. Otherwise sets a0 to 0. *) proc _compile_line(loca84: Word, loca80: Bool) +var + loca0: Char + loca4: Int + loca8: Bool + loca12: Word + loca16: ^Byte begin - beqz a0, .Lcompile_line_empty # Skip an empty line. + if loca84 = 0 then + goto .Lcompile_line_empty (* Skip an empty line. *) + end - lbu t0, (s1) - li t1, '(' - beq t0, t1, .Lcompile_line_comment + loca16 := _current() + loca0 := _front(loca16) + (* ( *) + if loca0 = 0x28 then + goto .Lcompile_line_comment + end + loca16 := _current() - li t0, 0x0a6d6172 # ram\n - sw t0, 12(sp) - li t0, 0x676f7270 # prog - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 8 - call _memcmp - beqz a0, .Lcompile_line_program + loca12 := 0x676f7270 (* prog *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_program + end - li t0, 0x0a74 # t\n - sw t0, 12(sp) - li t0, 0x736e6f63 # cons - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 6 - call _memcmp - beqz a0, .Lcompile_line_const + loca12 := 0x736e6f63 (* cons *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_const + end - li t0, 0x0a726176 # var\n - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_line_var + loca12 := 0x0a726176 (* var\n *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_var + end - li t0, 0x20 # _ - sw t0, 12(sp) - li t0, 0x636f7270 # proc - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 5 - call _memcmp - beqz a0, .Lcompile_line_procedure + loca12 := 0x636f7270 (* proc *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_procedure + end - li t0, 0x0a6e # n\n - sw t0, 12(sp) - li t0, 0x69676562 # begi - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 6 - call _memcmp - beqz a0, .Lcompile_line_begin + loca12 := 0x69676562 (* begi *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_begin + end - li t0, 0x2e646e65 # end. - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_line_exit + loca12 := 0x2e646e65 (* end. *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_exit + end - li t0, 0x61636f6c # loca - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_line_identifier + loca12 := 0x61636f6c (* loca *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_identifier + end - li t0, 0x7472 # rt - sw t0, 12(sp) - li t0, 0x6f706d69 # impo - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 6 - call _memcmp - beqz a0, .Lcompile_line_import + loca12 := 0x6f706d69 (* impo *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_import + end - li t0, 0x6f746f67 # goto - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 4 - call _memcmp - beqz a0, .Lcompile_line_goto + loca12 := 0x6f746f67 (* goto *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_goto + end - li t0, 0x6e72 # rn - sw t0, 12(sp) - li t0, 0x75746572 # retu - sw t0, 8(sp) - mv a0, s1 - addi a1, sp, 8 - li a2, 6 - call _memcmp - beqz a0, .Lcompile_line_return + loca12 := 0x75746572 (* retu *) + loca4 := _memcmp(loca16, @loca12, 4) + if loca4 = 0 then + goto .Lcompile_line_return + end - li t0, 0x6669 # if - sw t0, 12(sp) - mv a0, s1 - addi a1, sp, 12 - li a2, 2 - call _memcmp - beqz a0, .Lcompile_line_if - - lbu t0, (s1) - li t1, '.' - beq t0, t1, .Lcompile_line_label - li t1, '_' - beq t0, t1, .Lcompile_line_identifier + loca12 := 0x6669 (* if *) + loca4 := _memcmp(loca16, @loca12, 2) + if loca4 = 0 then + goto .Lcompile_line_if + end + (* . *) + if loca0 = 0x2e then + goto .Lcompile_line_label + end + (* _ *) + if loca0 = 0x5f then + goto .Lcompile_line_identifier + end goto .Lcompile_line_unchanged (* Else. *) .Lcompile_line_if: @@ -1326,10 +1382,6 @@ begin goto .Lcompile_line_section .Lcompile_line_return - (* DEBUG - mv a0, s1 - li a1, 8 - call _write_error *) _compile_return() goto .Lcompile_line_section @@ -1350,12 +1402,14 @@ begin goto .Lcompile_line_section .Lcompile_line_begin - lw a1, 80(sp) - bnez a1, .Lcompile_line_compile_entry + + if loca80 = 1 then + goto .Lcompile_line_compile_entry + end _compile_text_section() .Lcompile_line_compile_entry _compile_entry_point() - li a0, 1 + loca8 := 1 goto .Lcompile_line_end .Lcompile_line_const @@ -1363,12 +1417,13 @@ begin goto .Lcompile_line_section .Lcompile_line_procedure - lw a1, 80(sp) - bnez a1, .Lcompile_line_compile_procedure + if loca80 = 1 then + goto .Lcompile_line_compile_procedure + end _compile_text_section() .Lcompile_line_compile_procedure _compile_procedure() - li a0, 1 + loca8 := 1 goto .Lcompile_line_end .Lcompile_line_var @@ -1392,13 +1447,13 @@ begin goto .Lcompile_line_section .Lcompile_line_section - mv a0, zero + loca8 := 0 .Lcompile_line_end - sw a0, 12(sp) _skip_spaces() _skip_comment() - lw a0, 12(sp) + + return loca8 end (* Prints ".section .text" and exits. *) @@ -1443,11 +1498,9 @@ end proc _compile_exit() var loca0: Word begin - (* - li a0, 0 - li a7, SYS_EXIT - ecall - *) + (* li a0, 0 *) + (* li a7, SYS_EXIT *) + (* ecall *) loca0 := 0x6120696c (* li a *) _write_out(@loca0, 4) loca0 := 0x30202c30 (* 0, 0 *) @@ -1469,16 +1522,20 @@ end (* Finds the end of the line and returns its length in a0. *) proc _read_line() +var + loca0: ^Byte + loca4: Byte begin loca0 := _current() (* Local position in the source text. *) .Lread_line_do - lw t0, 0(sp) - lbu t1, (t0) # t1 = Current character. - beqz t1, .Lread_line_end # Exit the loop on the NUL character. - li t2, '\n' - beq t1, t2, .Lread_line_end # Exit the loop on the new line. - + loca4 := _front(loca0) (* t1 = Current character. *) + if loca4 = 0 then + goto .Lread_line_end (* Exit the loop on the NUL character. *) + end + if loca4 = 0x0a then + goto .Lread_line_end (* Exit the loop on the new line. *) + end loca0 := loca0 + 1 goto .Lread_line_do @@ -1491,18 +1548,26 @@ proc _compile() var loca0, loca4: Word loca8: Bool + loca12: Char + loca16: ^Byte begin loca4 := 0 (* Whether the text section header was already emitted. *) .Lcompile_do - lbu t0, (s1) # t0 = Current character. - beqz t0, .Lcompile_end # Exit the loop on the NUL character. + loca16 := _current() + loca12 := _front(loca16) (* t0 = Current character. *) - _skip_indentation() + if loca12 = 0 then + goto .Lcompile_end (* Exit the loop on the NUL character. *) + end + + _skip_spaces() loca0 := _read_line() loca8 := _compile_line(loca0, loca4) - beqz a0, .Lcompile_do + if loca8 = 0 then + goto .Lcompile_do + end (* Update whether the text section header was already emitted. *) loca4 := loca4 or loca8 @@ -1513,30 +1578,35 @@ end (* Returns the pointer to the current position in the source text in a0. *) proc _current() begin - mv a0, s1 + return s1 end (* a0 is the number of bytes to advance in the source text. *) -proc _advance() +proc _advance(loca84: Word) begin add s1, s1, a0 end -(* Returns the first character in the remaining source text. *) -proc _front() +(* +a0 - Pointer to an array to get the first element. + +Returns the first character in the remaining source text. +*) +proc _front(loca84: ^Word) begin - lbu a0, (s1) + return _get(loca84) & 0xff +end + +proc _main() +begin + (* Read the source from the standard input. *) + _read_file(source_code, SOURCE_BUFFER_SIZE) + + addi s2, zero, 1 end (* Entry point. *) begin - (* Read the source from the standard input. *) - la a0, source_code - la a1, SOURCE_BUFFER_SIZE # Buffer size. - lw a1, (a1) - call _read_file - - la s1, source_code # s1 = Source code position. - li s2, 1 + _main() _compile() end. diff --git a/boot/states.txt b/boot/states.txt new file mode 100644 index 0000000..20d5966 --- /dev/null +++ b/boot/states.txt @@ -0,0 +1,20 @@ +- start +digit: integer +upper: identifier +lower: identifier +space: start +invalid: error + +- identifier +digit: identifier +upper: identifier +lower: identifier +space: end +invalid: end + +- integer: +digit: integer +upper: end +lower: end +space: end +invalid: end diff --git a/boot/tokenizer.s b/boot/tokenizer.s new file mode 100644 index 0000000..2057c2e --- /dev/null +++ b/boot/tokenizer.s @@ -0,0 +1,181 @@ +.global _tokenizer_initialize + +.section .rodata + +raw_classes: + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "space\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "digit\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "upper\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "lower\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + .ascii "invalid\n" + +.section .bss +.type class_names, @object +.size class_names, 1024 +class_names: .zero 1024 + +.section .data +.type classes, @object +.size classes, 512 # 128 characters * 4 byte. +classes: + +.section .text + +# Initializes the classification table. +# +# Paramaters: +# a0 - Raw input for the classification table. +.type _tokenizer_classes, @function +_tokenizer_classes: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret + +# Initializes the lookup tables. +.type _tokenizer_initialize, @function +_tokenizer_initialize: + # Prologue. + addi sp, sp, -8 + sw ra, 4(sp) + sw s0, 0(sp) + addi s0, sp, 8 + + la a0, raw_classes + call _tokenizer_classes + + # Epilogue. + lw ra, 4(sp) + lw s0, 0(sp) + addi sp, sp, 8 + ret