Support preserved registers as identifiers

… in expressions
This commit is contained in:
Eugen Wissner 2025-04-29 23:08:46 +02:00
parent 9c66cec171
commit 23b7a1ab30
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
6 changed files with 851 additions and 503 deletions

View File

@ -37,7 +37,7 @@ end
directory 'build' directory 'build'
desc 'Initial stage' desc 'Initial stage'
file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'build'] do |t| file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' } source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source sh CROSS_GCC, '-nostdlib', '-o', t.name, *source

View File

@ -1,5 +1,6 @@
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _memcmp, _write_error, _put_char, _printi .global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp
.global _divide_by_zero_error, _exit .global _divide_by_zero_error, _exit
.section .rodata .section .rodata
@ -187,7 +188,9 @@ _write_out:
# a0 - Buffer pointer. # a0 - Buffer pointer.
# a1 - Buffer size. # a1 - Buffer size.
# #
# Returns the result in a0. # Sets s1 to the buffer passed in a0.
#
# Returns the amount of bytes written in a0.
.type _read_file, @function .type _read_file, @function
_read_file: _read_file:
# Prologue. # Prologue.
@ -196,9 +199,11 @@ _read_file:
sw s0, 0(sp) sw s0, 0(sp)
addi s0, sp, 8 addi s0, sp, 8
mv a2, a1 mv s1, a0
mv a1, a0
li a0, STDIN li a0, STDIN
mv a2, a1
mv a1, s1
li a7, SYS_READ li a7, SYS_READ
ecall ecall
@ -293,3 +298,11 @@ _put_char:
lw s0, 8(sp) lw s0, 8(sp)
add sp, sp, 16 add sp, sp, 16
ret ret
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
.type _get, @function
_get:
lw a0, (a0)
ret

View File

@ -207,6 +207,130 @@ _build_binary_expression:
addi sp, sp, 32 addi sp, sp, 32
ret ret
# Parameters:
# a0 - Identifier length.
# a1 - Register number as character.
.type _build_identifier_expression, @function
_build_identifier_expression:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw a0, 20(sp) # Identifier length.
sw a1, 16(sp) # Register number as character.
li t0, 0x61636f6c # loca
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lbuild_identifier_expression_local
lbu a0, (s1)
lw t0, 20(sp)
addi t0, t0, -2
seqz t0, t0
addi t1, a0, -'s'
seqz t1, t1
and t0, t0, t1
bnez t0, .Lbuild_identifier_expression_saved
# Global identifier.
lw t1, 16(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120616c # la a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
lbu a0, (s1)
call _is_upper
beqz a0, .Lbuild_identifier_expression_end
lw t1, 16(sp)
li t0, 0x0a290061 # a\0)\n
sll t2, t1, 8
or t0, t0, t2
sw t0, 12(sp)
li t0, 0x28202c00 # \0, (
or t0, t0, t1
sw t0, 8(sp)
li t0, 0x6120776c # lw a
sw t0, 4(sp)
addi a0, sp, 4
li a1, 12
call _write_out
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_saved:
li t0, 0x00202c00 # \0,_
lw t1, 16(sp)
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120766d # mv a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_local:
lw t1, 16(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120776c # lw a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out
li t0, 0x29707328 # (sp)
sw t0, 12(sp)
addi a0, sp, 12
li a1, 4
call _write_out
li a0, '\n'
call _put_char
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Evalutes an expression and saves the result in a0. # Evalutes an expression and saves the result in a0.
# #
# a0 - X in aX, the register number to save the result. # a0 - X in aX, the register number to save the result.
@ -242,31 +366,9 @@ _build_expression:
li t0, '_' li t0, '_'
beq a0, t0, .Lbuild_expression_call beq a0, t0, .Lbuild_expression_call
li t0, 0x61636f6c # loca lw a0, 20(sp)
sw t0, 8(sp) lw a1, 28(sp)
mv a0, s1 call _build_identifier_expression
addi a1, sp, 8
li a2, 4
call _memcmp
beqz a0, .Lbuild_expression_identifier
# Named identifier.
lw t1, 28(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 8(sp)
li t0, 0x6120616c # la a
sw t0, 4(sp)
addi a0, sp, 4
li a1, 7
call _write_out
lw a0, 24(sp)
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
j .Lbuild_expression_advance j .Lbuild_expression_advance
@ -313,33 +415,6 @@ _build_expression:
j .Lbuild_expression_advance j .Lbuild_expression_advance
.Lbuild_expression_identifier:
lw t1, 28(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 16(sp)
li t0, 0x6120776c # lw a
sw t0, 12(sp)
addi a0, sp, 12
li a1, 7
call _write_out
lw a0, 24(sp)
lw a1, 20(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out
li t0, '\n'
sw t0, 16(sp)
li t0, 0x29707328 # (sp)
sw t0, 12(sp)
addi a0, sp, 12
li a1, 5
call _write_out
j .Lbuild_expression_advance
.Lbuild_expression_call: .Lbuild_expression_call:
lw a0, 24(sp) lw a0, 24(sp)
lw a1, 20(sp) lw a1, 20(sp)
@ -683,24 +758,6 @@ _skip_spaces:
.Lspace_loop_end: .Lspace_loop_end:
ret ret
# Skips tabs at the line beginning.
.type _skip_indentation, @function
_skip_indentation:
.Lskip_indentation_do:
lbu t0, (s1)
li t1, '\t'
beq t0, t1, .Lskip_indentation_skip
j .Lskip_indentation_end
.Lskip_indentation_skip:
addi s1, s1, 1
j .Lskip_indentation_do
.Lskip_indentation_end:
ret
# Parameters: # Parameters:
# a0 - Line length. # a0 - Line length.
.type _skip_comment, @function .type _skip_comment, @function
@ -1134,7 +1191,7 @@ _compile_procedure:
# Generate the body of the procedure. # Generate the body of the procedure.
.Lcompile_procedure_body: .Lcompile_procedure_body:
call _skip_indentation call _skip_spaces
call _read_line call _read_line
sw a0, 12(sp) sw a0, 12(sp)
li t0, 0x0a646e65 # end\n li t0, 0x0a646e65 # end\n
@ -1427,23 +1484,19 @@ _compile_line:
li t1, '(' li t1, '('
beq t0, t1, .Lcompile_line_comment beq t0, t1, .Lcompile_line_comment
li t0, 0x0a6d6172 # ram\n
sw t0, 12(sp)
li t0, 0x676f7270 # prog li t0, 0x676f7270 # prog
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 8 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_program beqz a0, .Lcompile_line_program
li t0, 0x0a74 # t\n
sw t0, 12(sp)
li t0, 0x736e6f63 # cons li t0, 0x736e6f63 # cons
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 6 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_const beqz a0, .Lcompile_line_const
@ -1455,23 +1508,19 @@ _compile_line:
call _memcmp call _memcmp
beqz a0, .Lcompile_line_var beqz a0, .Lcompile_line_var
li t0, 0x20 # _
sw t0, 12(sp)
li t0, 0x636f7270 # proc li t0, 0x636f7270 # proc
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 5 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_procedure beqz a0, .Lcompile_line_procedure
li t0, 0x0a6e # n\n
sw t0, 12(sp)
li t0, 0x69676562 # begi li t0, 0x69676562 # begi
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 6 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_begin beqz a0, .Lcompile_line_begin
@ -1491,13 +1540,11 @@ _compile_line:
call _memcmp call _memcmp
beqz a0, .Lcompile_line_identifier beqz a0, .Lcompile_line_identifier
li t0, 0x7472 # rt
sw t0, 12(sp)
li t0, 0x6f706d69 # impo li t0, 0x6f706d69 # impo
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 6 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_import beqz a0, .Lcompile_line_import
@ -1509,13 +1556,11 @@ _compile_line:
call _memcmp call _memcmp
beqz a0, .Lcompile_line_goto beqz a0, .Lcompile_line_goto
li t0, 0x6e72 # rn
sw t0, 12(sp)
li t0, 0x75746572 # retu li t0, 0x75746572 # retu
sw t0, 8(sp) sw t0, 12(sp)
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 12
li a2, 6 li a2, 4
call _memcmp call _memcmp
beqz a0, .Lcompile_line_return beqz a0, .Lcompile_line_return
@ -1716,7 +1761,7 @@ _compile:
lbu t0, (s1) # t0 = Current character. lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character. beqz t0, .Lcompile_end # Exit the loop on the NUL character.
call _skip_indentation call _skip_spaces
call _read_line call _read_line
lw a1, 4(sp) lw a1, 4(sp)
call _compile_line call _compile_line
@ -1736,16 +1781,35 @@ _compile:
addi sp, sp, 16 addi sp, sp, 16
ret ret
# Entry point. .type _main, @function
.type _start, @function _main:
_start: # Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
# Read the source from the standard input. # Read the source from the standard input.
la a0, source_code la a0, source_code
li a1, SOURCE_BUFFER_SIZE # Buffer size. li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file call _read_file
li s2, 1 li s2, 1
la s1, source_code # s1 = Source code position.
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Entry point.
.type _start, @function
_start:
call _tokenizer_initialize
li a1, 50
call _write_error
call _main
call _compile call _compile
# Call exit. # Call exit.

File diff suppressed because it is too large Load Diff

20
boot/states.txt Normal file
View File

@ -0,0 +1,20 @@
- start
digit: integer
upper: identifier
lower: identifier
space: start
invalid: error
- identifier
digit: identifier
upper: identifier
lower: identifier
space: end
invalid: end
- integer:
digit: integer
upper: end
lower: end
space: end
invalid: end

181
boot/tokenizer.s Normal file
View File

@ -0,0 +1,181 @@
.global _tokenizer_initialize
.section .rodata
raw_classes:
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "space\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.section .bss
.type class_names, @object
.size class_names, 1024
class_names: .zero 1024
.section .data
.type classes, @object
.size classes, 512 # 128 characters * 4 byte.
classes:
.section .text
# Initializes the classification table.
#
# Paramaters:
# a0 - Raw input for the classification table.
.type _tokenizer_classes, @function
_tokenizer_classes:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Initializes the lookup tables.
.type _tokenizer_initialize, @function
_tokenizer_initialize:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
la a0, raw_classes
call _tokenizer_classes
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret