Support preserved registers as identifiers

… in expressions
This commit is contained in:
Eugen Wissner 2025-04-29 23:08:46 +02:00
parent 9c66cec171
commit 23b7a1ab30
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
6 changed files with 851 additions and 503 deletions

View File

@ -37,7 +37,7 @@ end
directory 'build'
desc 'Initial stage'
file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'build'] do |t|
file 'build/stage1' => ['boot/stage1.s', 'boot/common-boot.s', 'boot/tokenizer.s', 'build'] do |t|
source = t.prerequisites.filter { |prerequisite| prerequisite.end_with? '.s' }
sh CROSS_GCC, '-nostdlib', '-o', t.name, *source

View File

@ -1,5 +1,6 @@
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _memcmp, _write_error, _put_char, _printi
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp
.global _divide_by_zero_error, _exit
.section .rodata
@ -187,7 +188,9 @@ _write_out:
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Returns the result in a0.
# Sets s1 to the buffer passed in a0.
#
# Returns the amount of bytes written in a0.
.type _read_file, @function
_read_file:
# Prologue.
@ -196,9 +199,11 @@ _read_file:
sw s0, 0(sp)
addi s0, sp, 8
mv a2, a1
mv a1, a0
mv s1, a0
li a0, STDIN
mv a2, a1
mv a1, s1
li a7, SYS_READ
ecall
@ -293,3 +298,11 @@ _put_char:
lw s0, 8(sp)
add sp, sp, 16
ret
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
.type _get, @function
_get:
lw a0, (a0)
ret

View File

@ -207,6 +207,130 @@ _build_binary_expression:
addi sp, sp, 32
ret
# Parameters:
# a0 - Identifier length.
# a1 - Register number as character.
.type _build_identifier_expression, @function
_build_identifier_expression:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw a0, 20(sp) # Identifier length.
sw a1, 16(sp) # Register number as character.
li t0, 0x61636f6c # loca
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lbuild_identifier_expression_local
lbu a0, (s1)
lw t0, 20(sp)
addi t0, t0, -2
seqz t0, t0
addi t1, a0, -'s'
seqz t1, t1
and t0, t0, t1
bnez t0, .Lbuild_identifier_expression_saved
# Global identifier.
lw t1, 16(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120616c # la a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
lbu a0, (s1)
call _is_upper
beqz a0, .Lbuild_identifier_expression_end
lw t1, 16(sp)
li t0, 0x0a290061 # a\0)\n
sll t2, t1, 8
or t0, t0, t2
sw t0, 12(sp)
li t0, 0x28202c00 # \0, (
or t0, t0, t1
sw t0, 8(sp)
li t0, 0x6120776c # lw a
sw t0, 4(sp)
addi a0, sp, 4
li a1, 12
call _write_out
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_saved:
li t0, 0x00202c00 # \0,_
lw t1, 16(sp)
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120766d # mv a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_local:
lw t1, 16(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 12(sp)
li t0, 0x6120776c # lw a
sw t0, 8(sp)
addi a0, sp, 8
li a1, 7
call _write_out
mv a0, s1
lw a1, 20(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out
li t0, 0x29707328 # (sp)
sw t0, 12(sp)
addi a0, sp, 12
li a1, 4
call _write_out
li a0, '\n'
call _put_char
j .Lbuild_identifier_expression_end
.Lbuild_identifier_expression_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Evalutes an expression and saves the result in a0.
#
# a0 - X in aX, the register number to save the result.
@ -242,31 +366,9 @@ _build_expression:
li t0, '_'
beq a0, t0, .Lbuild_expression_call
li t0, 0x61636f6c # loca
sw t0, 8(sp)
mv a0, s1
addi a1, sp, 8
li a2, 4
call _memcmp
beqz a0, .Lbuild_expression_identifier
# Named identifier.
lw t1, 28(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 8(sp)
li t0, 0x6120616c # la a
sw t0, 4(sp)
addi a0, sp, 4
li a1, 7
call _write_out
lw a0, 24(sp)
lw a1, 20(sp)
call _write_out
li a0, '\n'
call _put_char
lw a0, 20(sp)
lw a1, 28(sp)
call _build_identifier_expression
j .Lbuild_expression_advance
@ -313,33 +415,6 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_identifier:
lw t1, 28(sp)
li t0, 0x00202c00 # \0,_
or t0, t0, t1
sw t0, 16(sp)
li t0, 0x6120776c # lw a
sw t0, 12(sp)
addi a0, sp, 12
li a1, 7
call _write_out
lw a0, 24(sp)
lw a1, 20(sp)
addi a0, a0, 4 # Skip the "loca" variable prefix.
addi a1, a1, -4 # Skip the "loca" variable prefix.
call _write_out
li t0, '\n'
sw t0, 16(sp)
li t0, 0x29707328 # (sp)
sw t0, 12(sp)
addi a0, sp, 12
li a1, 5
call _write_out
j .Lbuild_expression_advance
.Lbuild_expression_call:
lw a0, 24(sp)
lw a1, 20(sp)
@ -683,24 +758,6 @@ _skip_spaces:
.Lspace_loop_end:
ret
# Skips tabs at the line beginning.
.type _skip_indentation, @function
_skip_indentation:
.Lskip_indentation_do:
lbu t0, (s1)
li t1, '\t'
beq t0, t1, .Lskip_indentation_skip
j .Lskip_indentation_end
.Lskip_indentation_skip:
addi s1, s1, 1
j .Lskip_indentation_do
.Lskip_indentation_end:
ret
# Parameters:
# a0 - Line length.
.type _skip_comment, @function
@ -1134,7 +1191,7 @@ _compile_procedure:
# Generate the body of the procedure.
.Lcompile_procedure_body:
call _skip_indentation
call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x0a646e65 # end\n
@ -1427,23 +1484,19 @@ _compile_line:
li t1, '('
beq t0, t1, .Lcompile_line_comment
li t0, 0x0a6d6172 # ram\n
sw t0, 12(sp)
li t0, 0x676f7270 # prog
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 8
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_program
li t0, 0x0a74 # t\n
sw t0, 12(sp)
li t0, 0x736e6f63 # cons
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 6
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_const
@ -1455,23 +1508,19 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_var
li t0, 0x20 # _
sw t0, 12(sp)
li t0, 0x636f7270 # proc
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 5
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_procedure
li t0, 0x0a6e # n\n
sw t0, 12(sp)
li t0, 0x69676562 # begi
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 6
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_begin
@ -1491,13 +1540,11 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_identifier
li t0, 0x7472 # rt
sw t0, 12(sp)
li t0, 0x6f706d69 # impo
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 6
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_import
@ -1509,13 +1556,11 @@ _compile_line:
call _memcmp
beqz a0, .Lcompile_line_goto
li t0, 0x6e72 # rn
sw t0, 12(sp)
li t0, 0x75746572 # retu
sw t0, 8(sp)
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 8
li a2, 6
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_return
@ -1716,7 +1761,7 @@ _compile:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
call _skip_indentation
call _skip_spaces
call _read_line
lw a1, 4(sp)
call _compile_line
@ -1736,16 +1781,35 @@ _compile:
addi sp, sp, 16
ret
# Entry point.
.type _start, @function
_start:
.type _main, @function
_main:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
# Read the source from the standard input.
la a0, source_code
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
li s2, 1
la s1, source_code # s1 = Source code position.
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Entry point.
.type _start, @function
_start:
call _tokenizer_initialize
li a1, 50
call _write_error
call _main
call _compile
# Call exit.

File diff suppressed because it is too large Load Diff

20
boot/states.txt Normal file
View File

@ -0,0 +1,20 @@
- start
digit: integer
upper: identifier
lower: identifier
space: start
invalid: error
- identifier
digit: identifier
upper: identifier
lower: identifier
space: end
invalid: end
- integer:
digit: integer
upper: end
lower: end
space: end
invalid: end

181
boot/tokenizer.s Normal file
View File

@ -0,0 +1,181 @@
.global _tokenizer_initialize
.section .rodata
raw_classes:
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "space\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "digit\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "upper\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "lower\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.ascii "invalid\n"
.section .bss
.type class_names, @object
.size class_names, 1024
class_names: .zero 1024
.section .data
.type classes, @object
.size classes, 512 # 128 characters * 4 byte.
classes:
.section .text
# Initializes the classification table.
#
# Paramaters:
# a0 - Raw input for the classification table.
.type _tokenizer_classes, @function
_tokenizer_classes:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Initializes the lookup tables.
.type _tokenizer_initialize, @function
_tokenizer_initialize:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
la a0, raw_classes
call _tokenizer_classes
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret