Wrap the mmap2 syscall

This commit is contained in:
Eugen Wissner 2025-05-09 18:42:46 +02:00
parent 92f50fff5f
commit 890486532c
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
5 changed files with 264 additions and 137 deletions

View File

@ -5,17 +5,22 @@
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower .global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi .global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy .global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit .global _divide_by_zero_error, _exit, _mmap
.global _strings_index .global _strings_index, _string_equal
.section .rodata .section .rodata
.equ SYS_READ, 63 .equ SYS_READ, 63
.equ SYS_WRITE, 64 .equ SYS_WRITE, 64
.equ SYS_EXIT, 93 .equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0 .equ STDIN, 0
.equ STDOUT, 1 .equ STDOUT, 1
.equ STDERR, 2 .equ STDERR, 2
.equ PROT_READ, 0x1
.equ PROT_WRITE, 0x2
.equ MAP_PRIVATE, 0x02
.equ MAP_ANONYMOUS, 0x20
new_line: .ascii "\n" new_line: .ascii "\n"
@ -500,3 +505,58 @@ _strings_index:
lw s0, 24(sp) lw s0, 24(sp)
add sp, sp, 32 add sp, sp, 32
ret ret
# Compares two strings for equality.
#
# Parameters:
# a0 - Length of the first string.
# a1 - Pointer to the first string.
# a2 - Length of the second string.
# a3 - Pointer to the second string.
#
# Sets a0 to 1 if the string are equal, to 0 if not.
.type _string_equal, @function
_string_equal:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Compare string lengths.
bne a0, a2, .Lstring_equal_not_found
# If lengths match, compare the content.
mv a0, a1
mv a1, a3
# a2 is already set to the length.
call _memcmp
bnez a0, .Lstring_equal_not_found
li a0, 1
j .Lstring_equal_end
.Lstring_equal_not_found:
mv a0, zero
.Lstring_equal_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Sets a0 to the mapping address.
.type _mmap, @function
_mmap:
li a0, 0 # Address at which to create the mapping.
li a1, 4096 # The length of the mapping.
li a2, PROT_READ | PROT_WRITE # Protection flags.
li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file.
li a4, -1 # File descriptor.
li a5, 0 # Page offset.
li a7, SYS_MMAP2
ecall
ret

View File

@ -60,4 +60,5 @@
# #
# Symbols. # Symbols.
# #
.equ TYPE_PRIMITIVE, 1 .equ TYPE_PRIMITIVE, 0x01
.equ TYPE_POINTER, 0x02

View File

@ -8,6 +8,7 @@
# Registers used as global variables: # Registers used as global variables:
# s1 - Contains the current position in the source text. # s1 - Contains the current position in the source text.
# s2 - Label counter. # s2 - Label counter.
# s3 - Dynamic memory region.
# #
# - The compiler expects valid input, otherwise it will generate invalid # - The compiler expects valid input, otherwise it will generate invalid
# assembly or hang. There is no error checking, no semantic analysis, no # assembly or hang. There is no error checking, no semantic analysis, no
@ -95,13 +96,13 @@ _compile_import:
.Lcompile_import_loop: .Lcompile_import_loop:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
li t0, TOKEN_IMPORT li t0, TOKEN_IMPORT
lw t1, 0(sp) lw t1, 0(sp)
bne t0, t1, .Lcompile_import_end bne t0, t1, .Lcompile_import_end
# a0 is set from the previous _tokenize_next call. Skip the module name. # a0 is set from the previous lex_next call. Skip the module name.
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
j .Lcompile_import_loop j .Lcompile_import_loop
@ -113,8 +114,8 @@ _compile_import:
addi sp, sp, 24 addi sp, sp, 24
ret ret
.type _build_binary_expression, @function .type compile_binary_expression, @function
_build_binary_expression: compile_binary_expression:
# Prologue. # Prologue.
addi sp, sp, -32 addi sp, sp, -32
sw ra, 28(sp) sw ra, 28(sp)
@ -126,30 +127,30 @@ _build_binary_expression:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
lw t0, 12(sp) lw t0, 12(sp)
li t1, TOKEN_AND li t1, TOKEN_AND
beq t0, t1, .Lbuild_binary_expression_and beq t0, t1, .Lcompile_binary_expression_and
li t1, TOKEN_OR li t1, TOKEN_OR
beq t0, t1, .Lbuild_binary_expression_or beq t0, t1, .Lcompile_binary_expression_or
li t1, TOKEN_PLUS li t1, TOKEN_PLUS
beq t0, t1, .Lbuild_binary_expression_plus beq t0, t1, .Lcompile_binary_expression_plus
li t1, TOKEN_EQUALS li t1, TOKEN_EQUALS
beq t0, t1, .Lbuild_binary_expression_equal beq t0, t1, .Lcompile_binary_expression_equal
li t1, TOKEN_ASTERISK li t1, TOKEN_ASTERISK
beq t0, t1, .Lbuild_binary_expression_product beq t0, t1, .Lcompile_binary_expression_product
li t1, TOKEN_MINUS li t1, TOKEN_MINUS
beq t0, t1, .Lbuild_binary_expression_minus beq t0, t1, .Lcompile_binary_expression_minus
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_equal: .Lcompile_binary_expression_equal:
mv s1, a0 # Skip =. mv s1, a0 # Skip =.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -161,9 +162,9 @@ _build_binary_expression:
li a1, ASM_SEQZ_A0_SIZE li a1, ASM_SEQZ_A0_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_and: .Lcompile_binary_expression_and:
mv s1, a0 # Skip &. mv s1, a0 # Skip &.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -171,9 +172,9 @@ _build_binary_expression:
li a1, ASM_AND_A0_A1_SIZE li a1, ASM_AND_A0_A1_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_or: .Lcompile_binary_expression_or:
mv s1, a0 # Skip or. mv s1, a0 # Skip or.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -181,9 +182,9 @@ _build_binary_expression:
li a1, ASM_OR_A0_A1_SIZE li a1, ASM_OR_A0_A1_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_plus: .Lcompile_binary_expression_plus:
mv s1, a0 # Skip +. mv s1, a0 # Skip +.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -191,9 +192,9 @@ _build_binary_expression:
li a1, ASM_ADD_A0_A1_SIZE li a1, ASM_ADD_A0_A1_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_minus: .Lcompile_binary_expression_minus:
mv s1, a0 # Skip -. mv s1, a0 # Skip -.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -201,9 +202,9 @@ _build_binary_expression:
li a1, ASM_SUB_A0_A1_SIZE li a1, ASM_SUB_A0_A1_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_product: .Lcompile_binary_expression_product:
mv s1, a0 # Skip *. mv s1, a0 # Skip *.
li a0, 1 li a0, 1
call _build_expression call _build_expression
@ -211,9 +212,9 @@ _build_binary_expression:
li a1, ASM_MUL_A0_A1_SIZE li a1, ASM_MUL_A0_A1_SIZE
call _write_out call _write_out
j .Lbuild_binary_expression_end j .Lcompile_binary_expression_end
.Lbuild_binary_expression_end: .Lcompile_binary_expression_end:
# Epilogue. # Epilogue.
lw ra, 28(sp) lw ra, 28(sp)
lw s0, 24(sp) lw s0, 24(sp)
@ -408,7 +409,7 @@ _build_expression:
mv a0, s1 mv a0, s1
addi a1, sp, 24 addi a1, sp, 24
call _tokenize_next call lex_next
sw a0, 20(sp) sw a0, 20(sp)
lw a0, 24(sp) lw a0, 24(sp)
@ -462,7 +463,7 @@ _build_expression:
lw a0, 20(sp) # Skip @. lw a0, 20(sp) # Skip @.
addi a1, sp, 24 addi a1, sp, 24
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw a0, 32(sp) lw a0, 32(sp)
@ -479,7 +480,7 @@ _build_expression:
.Lbuild_expression_call: .Lbuild_expression_call:
lw a0, 20(sp) lw a0, 20(sp)
addi a1, sp, 8 addi a1, sp, 8
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw a0, 32(sp) lw a0, 32(sp)
@ -613,9 +614,9 @@ _compile_identifier:
# Save the pointer to the identifier and its length on the stack. # Save the pointer to the identifier and its length on the stack.
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw t0, 0(sp) lw t0, 0(sp)
@ -636,7 +637,7 @@ _compile_identifier:
j .Lcompile_identifier_end j .Lcompile_identifier_end
.Lcompile_identifier_assign: .Lcompile_identifier_assign:
call _build_binary_expression call compile_binary_expression
lw a0, 20(sp) lw a0, 20(sp)
lw a1, 16(sp) lw a1, 16(sp)
call _compile_designator_expression call _compile_designator_expression
@ -758,7 +759,7 @@ _compile_procedure_section:
.Lcompile_procedure_section_loop: .Lcompile_procedure_section_loop:
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
li t0, TOKEN_PROC li t0, TOKEN_PROC
lw t1, 4(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_procedure_section_end bne t0, t1, .Lcompile_procedure_section_end
@ -789,7 +790,7 @@ _compile_module_declaration:
# Skip "program". # Skip "program".
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
# Epilogue. # Epilogue.
@ -808,7 +809,7 @@ _compile_constant_section:
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
li t0, TOKEN_CONST li t0, TOKEN_CONST
lw t1, 4(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_constant_section_end bne t0, t1, .Lcompile_constant_section_end
@ -821,7 +822,7 @@ _compile_constant_section:
.Lcompile_constant_section_item: .Lcompile_constant_section_item:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
lw t0, 12(sp) lw t0, 12(sp)
li t1, TOKEN_IDENTIFIER li t1, TOKEN_IDENTIFIER
@ -849,9 +850,9 @@ _compile_constant:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next # Skip the assignment sign. call lex_next # Skip the assignment sign.
mv s1, a0 mv s1, a0
# Write identifier the identifier. # Write identifier the identifier.
lw a0, 20(sp) lw a0, 20(sp)
@ -869,7 +870,7 @@ _compile_constant:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw a0, 20(sp) # Save the literal pointer before advancing it. lw a0, 20(sp) # Save the literal pointer before advancing it.
@ -895,7 +896,7 @@ _compile_variable_section:
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
li t0, TOKEN_VAR li t0, TOKEN_VAR
lw t1, 4(sp) lw t1, 4(sp)
bne t0, t1, .Lcompile_variable_section_end bne t0, t1, .Lcompile_variable_section_end
@ -908,7 +909,7 @@ _compile_variable_section:
.Lcompile_variable_section_item: .Lcompile_variable_section_item:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
lw t0, 12(sp) lw t0, 12(sp)
li t1, TOKEN_IDENTIFIER li t1, TOKEN_IDENTIFIER
@ -937,17 +938,17 @@ _compile_variable:
# Save the identifier on the stack since it should emitted multiple times. # Save the identifier on the stack since it should emitted multiple times.
mv a0, s1 mv a0, s1
addi a1, sp, 28 addi a1, sp, 28
call _tokenize_next call lex_next
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next # Skip the colon in front of the type. call lex_next # Skip the colon in front of the type.
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next # Skip the opening bracket. call lex_next # Skip the opening bracket.
addi a1, sp, 16 addi a1, sp, 16
call _tokenize_next # Save the array size on the stack since it has to be emitted multiple times. call lex_next # Save the array size on the stack since it has to be emitted multiple times.
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next # Skip the closing bracket. call lex_next # Skip the closing bracket.
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next # Skip the type. call lex_next # Skip the type.
mv s1, a0 mv s1, a0
# .type identifier, @object # .type identifier, @object
@ -989,8 +990,9 @@ _compile_variable:
addi sp, sp, 48 addi sp, sp, 48
ret ret
.type _compile_type_expression, @function # Sets a0 to the type pointer.
_compile_type_expression: .type compile_type_expression, @function
compile_type_expression:
# Prologue. # Prologue.
addi sp, sp, -32 addi sp, sp, -32
sw ra, 28(sp) sw ra, 28(sp)
@ -1000,19 +1002,49 @@ _compile_type_expression:
.Lcompile_type_expression_type: .Lcompile_type_expression_type:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw t0, 12(sp) lw t0, 12(sp)
# Skip the pointer designator and handle the rest of the type. li t1, TOKEN_HAT # Pointer type.
li t1, TOKEN_HAT beq t0, t1, .Lcompile_type_expression_pointer
beq t0, t1, .Lcompile_type_expression_type
# Named type.
mv a0, zero
lw a1, 16(sp)
lw a2, 20(sp)
call symbol_table_find
/* DEBUG
sw a0, 4(sp)
lw a0, 4(a0)
addi a0, a0, '0'
sw a0, 8(sp)
/* DEBUG */
lw a0, 20(sp) lw a0, 20(sp)
lw a1, 16(sp) lw a1, 16(sp)
call _write_error call _write_error
addi a0, sp, 8
li a1, 1
call _write_error
lw a0, 4(sp) */
/* DEBUG */
j .Lcompile_type_expression_end
.Lcompile_type_expression_pointer:
call compile_type_expression
mv a1, s3
call symbol_table_make_pointer
add s3, s3, a0
sub a0, s3, a0
j .Lcompile_type_expression_end
.Lcompile_type_expression_end:
# Epilogue. # Epilogue.
lw ra, 28(sp) lw ra, 28(sp)
lw s0, 24(sp) lw s0, 24(sp)
@ -1029,12 +1061,12 @@ _compile_parameters:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 # Skip the opening paren. mv s1, a0 # Skip the opening paren.
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
lw t0, 12(sp) lw t0, 12(sp)
li t1, TOKEN_RIGHT_PAREN li t1, TOKEN_RIGHT_PAREN
@ -1045,21 +1077,21 @@ _compile_parameters:
.Lcompile_parameters_parameter: .Lcompile_parameters_parameter:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
mv s1, a0 # Skip the ":" in front of the type. mv s1, a0 # Skip the ":" in front of the type.
call _compile_type_expression call compile_type_expression
# Read the comma between the parameters or a closing paren. # Read the comma between the parameters or a closing paren.
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
lw t0, 0(sp) lw t0, 0(sp)
li t1, TOKEN_COMMA li t1, TOKEN_COMMA
bne t0, t1, .Lcompile_parameters_end bne t0, t1, .Lcompile_parameters_end
# If it is a comma, read the name of the next parameter. # If it is a comma, read the name of the next parameter.
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
j .Lcompile_parameters_parameter j .Lcompile_parameters_parameter
@ -1083,9 +1115,9 @@ _compile_procedure:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next # Skip proc. call lex_next # Skip proc.
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw a0, 16(sp) lw a0, 16(sp)
@ -1098,7 +1130,7 @@ _compile_procedure:
.Lcompile_procedure_begin: .Lcompile_procedure_begin:
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
lw t0, 4(sp) lw t0, 4(sp)
li t1, TOKEN_BEGIN li t1, TOKEN_BEGIN
@ -1140,11 +1172,11 @@ _compile_goto:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next # Skip the goto keyword. call lex_next # Skip the goto keyword.
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next # We should be on dot the label is beginning with. call lex_next # We should be on dot the label is beginning with.
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next# Save the label name. call lex_next# Save the label name.
mv s1, a0 mv s1, a0
li t0, 0x2e206a # j . li t0, 0x2e206a # j .
@ -1177,9 +1209,9 @@ _compile_label:
mv a0, s1 mv a0, s1
addi a1, sp, 8 addi a1, sp, 8
call _tokenize_next # Dot starting the label. call lex_next # Dot starting the label.
addi a1, sp, 8 addi a1, sp, 8
call _tokenize_next call lex_next
mv s1, a0 mv s1, a0
li a0, '.' li a0, '.'
@ -1209,10 +1241,10 @@ _compile_return:
mv a0, s1 mv a0, s1
addi a1, sp, 12 addi a1, sp, 12
call _tokenize_next call lex_next
mv s1, a0 # Skip return. mv s1, a0 # Skip return.
call _build_binary_expression call compile_binary_expression
# Epilogue. # Epilogue.
lw ra, 28(sp) lw ra, 28(sp)
@ -1230,14 +1262,14 @@ _compile_if:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
mv s1, a0 # Skip the if. mv s1, a0 # Skip the if.
call _build_binary_expression call compile_binary_expression
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
mv s1, a0 # Skip the then. mv s1, a0 # Skip the then.
# Label prefix. # Label prefix.
@ -1351,7 +1383,7 @@ _compile_statements:
# Generate the body of the procedure. # Generate the body of the procedure.
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
lw t0, 0(sp) lw t0, 0(sp)
li t1, TOKEN_END li t1, TOKEN_END
@ -1362,7 +1394,7 @@ _compile_statements:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
lw t0, 0(sp) lw t0, 0(sp)
li t1, TOKEN_SEMICOLON li t1, TOKEN_SEMICOLON
@ -1389,7 +1421,7 @@ _compile_statement:
mv a0, s1 mv a0, s1
addi a1, sp, 0 addi a1, sp, 0
call _tokenize_next call lex_next
lw t0, 0(sp) lw t0, 0(sp)
li t1, TOKEN_IDENTIFIER li t1, TOKEN_IDENTIFIER
@ -1470,7 +1502,7 @@ _compile_entry_point:
mv a0, s1 mv a0, s1
addi a1, sp, 4 addi a1, sp, 4
call _tokenize_next call lex_next
mv s1, a0 # Skip begin. mv s1, a0 # Skip begin.
# Generate the body of the procedure. # Generate the body of the procedure.
@ -1518,6 +1550,9 @@ _start:
call _read_file call _read_file
li s2, 1 li s2, 1
call _mmap
mv s3, a0
call symbol_table_build call symbol_table_build
call _compile call _compile

View File

@ -2,7 +2,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can # v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. # obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build, symbol_table_find .global symbol_table_build, symbol_table_find, symbol_table_make_pointer
.include "boot/definitions.inc" .include "boot/definitions.inc"
@ -18,6 +18,8 @@ symbol_builtin_name_word: .ascii "Word"
symbol_builtin_name_byte: .ascii "Byte" symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object .type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char" symbol_builtin_name_char: .ascii "Char"
.type symbol_builtin_name_bool, @object
symbol_builtin_name_bool: .ascii "Bool"
# Every type info starts with a word describing what type it is. # Every type info starts with a word describing what type it is.
@ -34,6 +36,9 @@ symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.type symbol_builtin_type_char, @object .type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1 .word 1
.type symbol_builtin_type_bool, @object
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
.word 1
.section .bss .section .bss
@ -81,20 +86,19 @@ symbol_table_find:
beqz s2, .Lsymbol_table_find_not_found beqz s2, .Lsymbol_table_find_not_found
# Compare string lengths. # Compare string lengths.
mv a0, s3
mv a1, s4
lw a2, 0(s1) lw a2, 0(s1)
bne s3, a2, .Lsymbol_table_find_continue lw a3, 4(s1)
call _string_equal
# If lengths match, compare the content. beqz a0, .Lsymbol_table_find_continue
mv a0, s4
lw a1, 4(s1)
call _memcmp
bnez a0, .Lsymbol_table_find_continue
lw a0, 8(s1) # Pointer to the symbol. lw a0, 8(s1) # Pointer to the symbol.
j .Lsymbol_table_end j .Lsymbol_table_end
.Lsymbol_table_find_continue: .Lsymbol_table_find_continue:
addi s1, s1, 12
addi s2, s2, -1 addi s2, s2, -1
j .Lsymbol_table_find_loop j .Lsymbol_table_find_loop
@ -113,6 +117,22 @@ symbol_table_find:
addi sp, sp, 32 addi sp, sp, 32
ret ret
# Creates a pointer type.
#
# Parameters:
# a0 - Pointer to the base type.
# a1 - Output memory.
#
# Sets a0 to the size of newly created type in bytes.
.type symbol_table_make_pointer, @function
symbol_table_make_pointer:
li t0, TYPE_POINTER
sw t0, 0(a1)
sw a0, 4(a1)
li a0, 8
ret
# Build the initial symbols. # Build the initial symbols.
# #
# Sets a0 to the pointer to the global symbol table. # Sets a0 to the pointer to the global symbol table.
@ -165,4 +185,15 @@ symbol_table_build:
sw t1, 0(a0) sw t1, 0(a0)
addi t0, t0, 12 addi t0, t0, 12
li t1, 4 # Length of the word "Bool".
sw t1, 0(t0)
la t1, symbol_builtin_name_bool
sw t1, 4(t0)
la t1, symbol_builtin_type_bool
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
ret ret

View File

@ -2,7 +2,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can # v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. # obtain one at https://mozilla.org/MPL/2.0/.
.global _tokenize_next, classification, transitions, keywords, byte_keywords .global lex_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc" .include "boot/definitions.inc"
@ -246,7 +246,7 @@ byte_keywords: .ascii "&.,:;()[]^=+-*@"
# It specifies the target state. "ff" means that this is an end state and no # It specifies the target state. "ff" means that this is an end state and no
# transition is possible. # transition is possible.
# - The next byte is the action that should be performed when transitioning. # - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the _tokenize_next function, which # For the meaning of actions see labels in the lex_next function, which
# handles each action. # handles each action.
# #
.type transitions, @object .type transitions, @object
@ -318,8 +318,8 @@ transitions:
# a0 - Character. # a0 - Character.
# #
# Sets a0 to the class number. # Sets a0 to the class number.
.type _classify, @function .type classify, @function
_classify: classify:
la t0, classification la t0, classification
add t0, t0, a0 # Character class pointer. add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class. lbu a0, (t0) # Character class.
@ -332,8 +332,8 @@ _classify:
# a1 - Character class. # a1 - Character class.
# #
# Sets a0 to the next state. # Sets a0 to the next state.
.type _lookup_state, @function .type lookup_state, @function
_lookup_state: lookup_state:
li t0, CLASS_COUNT li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row. mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column. add a0, a0, a1 # Transition column.
@ -347,7 +347,7 @@ _lookup_state:
ret ret
# Chains _classify and _lookup_state. # Chains classify and lookup_state.
# #
# Parameters: # Parameters:
# a0 - Current state. # a0 - Current state.
@ -364,11 +364,11 @@ _next_state:
sw a0, 4(sp) sw a0, 4(sp)
mv a0, a1 mv a0, a1
call _classify call classify
mv a1, a0 mv a1, a0
lw a0, 4(sp) lw a0, 4(sp)
call _lookup_state call lookup_state
# Epilogue. # Epilogue.
lw ra, 12(sp) lw ra, 12(sp)
@ -383,8 +383,8 @@ _next_state:
# a1 - Token pointer. # a1 - Token pointer.
# #
# Sets a0 to the appropriate token type. # Sets a0 to the appropriate token type.
.type _classify_identifier, @function .type classify_identifier, @function
_classify_identifier: classify_identifier:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -16
sw ra, 12(sp) sw ra, 12(sp)
@ -413,8 +413,8 @@ _classify_identifier:
# a0 - Token character. # a0 - Token character.
# #
# Sets a0 to the appropriate token type. # Sets a0 to the appropriate token type.
.type _classify_single, @function .type classify_single, @function
_classify_single: classify_single:
# Prologue. # Prologue.
addi sp, sp, -16 addi sp, sp, -16
sw ra, 12(sp) sw ra, 12(sp)
@ -443,8 +443,8 @@ _classify_single:
# a1 - Token pointer. # a1 - Token pointer.
# #
# Sets a0 to the appropriate token type. # Sets a0 to the appropriate token type.
.type _classify_composite, @function .type classify_composite, @function
_classify_composite: classify_composite:
lbu t0, 0(a1) lbu t0, 0(a1)
li t1, ':' li t1, ':'
beq t0, t1, .Lclassify_composite_assign beq t0, t1, .Lclassify_composite_assign
@ -465,8 +465,8 @@ _classify_composite:
# a1 - A pointer for output value, the token kind. 4 Bytes. # a1 - A pointer for output value, the token kind. 4 Bytes.
# #
# Sets a0 to the position of the next token. # Sets a0 to the position of the next token.
.type _tokenize_next, @function .type lex_next, @function
_tokenize_next: lex_next:
# Prologue. # Prologue.
addi sp, sp, -32 addi sp, sp, -32
sw ra, 28(sp) sw ra, 28(sp)
@ -484,7 +484,7 @@ _tokenize_next:
sw a1, 0(sp) sw a1, 0(sp)
sw zero, (a1) # Initialize. sw zero, (a1) # Initialize.
.Ltokenize_next_loop: .Llex_next_loop:
mv a0, s2 mv a0, s2
lbu a1, (s1) lbu a1, (s1)
call _next_state call _next_state
@ -498,50 +498,50 @@ _tokenize_next:
# Perform the provided action. # Perform the provided action.
li t0, 0x01 # Accumulate action. li t0, 0x01 # Accumulate action.
beq t1, t0, .Ltokenize_next_accumulate beq t1, t0, .Llex_next_accumulate
li t0, 0x02 # Print action. li t0, 0x02 # Print action.
beq t1, t0, .Ltokenize_next_print beq t1, t0, .Llex_next_print
li t0, 0x03 # Skip action. li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip beq t1, t0, .Llex_next_skip
li t0, 0x04 # Delimited string action. li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment beq t1, t0, .Llex_next_comment
li t0, 0x05 # Finalize identifier. li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier beq t1, t0, .Llex_next_identifier
li t0, 0x06 # Single character symbol action. li t0, 0x06 # Single character symbol action.
beq t1, t0, .Ltokenize_next_single beq t1, t0, .Llex_next_single
li t0, 0x07 # An action for symbols containing multiple characters. li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite beq t1, t0, .Llex_next_composite
li t0, 0x08 # Integer action. li t0, 0x08 # Integer action.
beq t1, t0, .Ltokenize_next_integer beq t1, t0, .Llex_next_integer
j .Ltokenize_next_reject j .Llex_next_reject
.Ltokenize_next_reject: .Llex_next_reject:
addi s1, s1, 1 addi s1, s1, 1
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_accumulate: .Llex_next_accumulate:
addi s1, s1, 1 addi s1, s1, 1
j .Ltokenize_next_loop j .Llex_next_loop
.Ltokenize_next_skip: .Llex_next_skip:
addi s1, s1, 1 addi s1, s1, 1
lw t0, 12(sp) lw t0, 12(sp)
addi t0, t0, 1 addi t0, t0, 1
sw t0, 12(sp) sw t0, 12(sp)
j .Ltokenize_next_loop j .Llex_next_loop
.Ltokenize_next_print: .Llex_next_print:
/* DEBUG /* DEBUG
addi a0, a0, 21 addi a0, a0, 21
sw a0, 0(sp) sw a0, 0(sp)
@ -549,49 +549,49 @@ _tokenize_next:
li a1, 1 li a1, 1
call _write_error */ call _write_error */
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_comment: .Llex_next_comment:
addi s1, s1, 1 addi s1, s1, 1
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_identifier: .Llex_next_identifier:
# An identifier can be a textual keyword. # An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter. # Check the kind of the token and write it into the output parameter.
lw a1, 12(sp) lw a1, 12(sp)
sub a0, s1, a1 sub a0, s1, a1
sw a0, 8(sp) sw a0, 8(sp)
call _classify_identifier call classify_identifier
sw a0, 4(sp) sw a0, 4(sp)
lw a0, 0(sp) lw a0, 0(sp)
addi a1, sp, 4 addi a1, sp, 4
li a2, 12 li a2, 12
call _memcpy call _memcpy
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_single: .Llex_next_single:
lw a0, 12(sp) lw a0, 12(sp)
addi s1, a0, 1 addi s1, a0, 1
lbu a0, (a0) lbu a0, (a0)
call _classify_single call classify_single
lw a1, 0(sp) lw a1, 0(sp)
sw a0, (a1) sw a0, (a1)
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_composite: .Llex_next_composite:
addi s1, s1, 1 addi s1, s1, 1
lw a1, 12(sp) lw a1, 12(sp)
sub a0, s1, a1 sub a0, s1, a1
call _classify_composite call classify_composite
lw a1, 0(sp) lw a1, 0(sp)
sw a0, (a1) sw a0, (a1)
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_integer: .Llex_next_integer:
lw t0, 0(sp) lw t0, 0(sp)
li t1, TOKEN_INTEGER li t1, TOKEN_INTEGER
sw t1, 0(t0) sw t1, 0(t0)
@ -600,9 +600,9 @@ _tokenize_next:
sub t1, s1, t1 sub t1, s1, t1
sw t1, 4(t0) sw t1, 4(t0)
j .Ltokenize_next_end j .Llex_next_end
.Ltokenize_next_end: .Llex_next_end:
mv a0, s1 # Return the advanced text pointer. mv a0, s1 # Return the advanced text pointer.
# Restore saved registers. # Restore saved registers.