Replace _read_token with the lexer

This commit is contained in:
Eugen Wissner 2025-05-06 23:58:46 +02:00
parent 3bd86e6e1c
commit 40701008f0
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
3 changed files with 91 additions and 276 deletions

View File

@ -409,11 +409,15 @@ _build_expression:
mv a0, s1
addi a1, sp, 24
call _tokenize_next
sw a0, 20(sp)
call _skip_spaces
call _read_token
sw s1, 32(sp)
sw a0, 28(sp)
/* DEBUG
lw a0, 32(sp)
lw a1, 28(sp)
call _write_error
lw a0, 28(sp)
li a1, 8
call _write_error */
lw a0, 24(sp)
@ -423,14 +427,15 @@ _build_expression:
li t0, TOKEN_AT
beq a0, t0, .Lbuild_expression_address
lbu a0, (s1)
call _is_digit
bnez a0, .Lbuild_expression_literal
li t0, TOKEN_INTEGER
beq a0, t0, .Lbuild_expression_literal
lbu a0, (s1)
lw a0, 32(sp)
lbu a0, (a0)
li t0, '_'
beq a0, t0, .Lbuild_expression_call
lw s1, 32(sp)
lw a0, 28(sp)
lw a1, 36(sp)
call _compile_identifier_expression
@ -438,7 +443,7 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_negate:
addi s1, s1, 1 # Skip the -.
lw s1, 20(sp) # Skip the -.
mv a0, zero
call _build_expression
@ -446,7 +451,7 @@ _build_expression:
li a1, ASM_NEG_A0_SIZE
call _write_out
j .Lbuild_expression_advance
j .Lbuild_expression_end
.Lbuild_expression_address:
lw t1, 36(sp)
@ -463,10 +468,10 @@ _build_expression:
li a1, 13
call _write_out
addi s1, s1, 1 # Skip @.
call _read_token
sw s1, 32(sp)
sw a0, 28(sp)
lw a0, 20(sp) # Skip @.
addi a1, sp, 24
call _tokenize_next
mv s1, a0
lw a0, 32(sp)
lw a1, 28(sp)
@ -477,13 +482,16 @@ _build_expression:
li a0, '\n'
call _put_char
j .Lbuild_expression_advance
j .Lbuild_expression_end
.Lbuild_expression_call:
lw a0, 20(sp)
addi a1, sp, 8
call _tokenize_next
mv s1, a0
lw a0, 32(sp)
lw a1, 28(sp)
add s1, s1, a1
addi s1, s1, 1
call _compile_call
j .Lbuild_expression_end
@ -509,8 +517,7 @@ _build_expression:
j .Lbuild_expression_advance
.Lbuild_expression_advance:
lw a0, 28(sp)
add s1, s1, a0
lw s1, 20(sp)
.Lbuild_expression_end:
# Epilogue.
@ -747,102 +754,6 @@ _compile_call:
addi sp, sp, 32
ret
# Reads a token and returns its length in a0.
# _read_token doesn't change s1, it finds the length of the token s1 is pointing to.
.type _read_token, @function
_read_token:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
lbu t0, (s1) # t0 = Current character.
sw zero, 4(sp)
li t1, '.'
beq t0, t1, .Ltoken_character_single
li t1, ','
beq t0, t1, .Ltoken_character_single
li t1, ':'
beq t0, t1, .Ltoken_character_colon
li t1, ';'
beq t0, t1, .Ltoken_character_single
li t1, '('
beq t0, t1, .Ltoken_character_single
li t1, ')'
beq t0, t1, .Ltoken_character_single
li t1, '['
beq t0, t1, .Ltoken_character_single
li t1, ']'
beq t0, t1, .Ltoken_character_single
li t1, '^'
beq t0, t1, .Ltoken_character_single
li t1, '&'
beq t0, t1, .Ltoken_character_single
li t1, '='
beq t0, t1, .Ltoken_character_single
li t1, '+'
beq t0, t1, .Ltoken_character_single
li t1, '-'
beq t0, t1, .Ltoken_character_single
li t1, '*'
beq t0, t1, .Ltoken_character_single
li t1, '@'
beq t0, t1, .Ltoken_character_single
# Expect an identifier or a number.
.Ltoken_character_loop_do:
lw t6, 4(sp)
add t1, s1, t6
lbu a0, (t1) # a0 = Current character.
call _is_alnum
beqz a0, .Ltoken_character_end
lw t6, 4(sp)
addi t6, t6, 1
sw t6, 4(sp)
j .Ltoken_character_loop_do
.Ltoken_character_single:
lw t6, 4(sp)
addi t6, t6, 1
sw t6, 4(sp)
j .Ltoken_character_end
.Ltoken_character_colon:
lbu t0, 1(s1) # t0 = The character after the colon.
lw t6, 4(sp)
addi t6, t6, 1
sw t6, 4(sp)
li t1, '='
beq t0, t1, .Ltoken_character_single
j .Ltoken_character_end
.Ltoken_character_end:
lw a0, 4(sp)
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Skips the spaces till the next non space character.
.type _skip_spaces, @function
_skip_spaces:
@ -911,15 +822,14 @@ _skip_comment:
addi sp, sp, 16
ret
# Parameters:
# a0 - Line length.
# Walks through the procedure definitions.
.type _compile_procedure_section, @function
_compile_procedure_section:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
.Lcompile_procedure_section_loop:
call _skip_spaces
@ -938,9 +848,9 @@ _compile_procedure_section:
.Lcompile_procedure_section_end:
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
.type _compile_module_declaration, @function
@ -970,10 +880,10 @@ _compile_module_declaration:
.type _compile_constant_section, @function
_compile_constant_section:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
mv a0, s1
addi a1, sp, 4
@ -988,19 +898,24 @@ _compile_constant_section:
call _write_out
.Lcompile_constant_section_item:
call _skip_spaces
lbu a0, (s1)
call _is_upper
beqz a0, .Lcompile_constant_section_end
mv a0, s1
addi a1, sp, 12
call _tokenize_next
lw t0, 12(sp)
li t1, TOKEN_IDENTIFIER
bne t0, t1, .Lcompile_constant_section_end
lw s1, 20(sp)
call _compile_constant
j .Lcompile_constant_section_item
.Lcompile_constant_section_end:
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
.type _compile_constant, @function
@ -1042,7 +957,6 @@ _compile_constant:
li a0, '\n'
call _put_char
call _skip_spaces
# Epilogue.
lw ra, 28(sp)
@ -1053,10 +967,10 @@ _compile_constant:
.type _compile_variable_section, @function
_compile_variable_section:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
mv a0, s1
addi a1, sp, 4
@ -1071,19 +985,23 @@ _compile_variable_section:
call _write_out
.Lcompile_variable_section_item:
call _skip_spaces
lbu a0, (s1)
call _is_lower
beqz a0, .Lcompile_variable_section_end
mv a0, s1
addi a1, sp, 12
call _tokenize_next
lw t0, 12(sp)
li t1, TOKEN_IDENTIFIER
bne t0, t1, .Lcompile_variable_section_end
lw s1, 20(sp) # Advance to the beginning of the variable name.
call _compile_variable
j .Lcompile_variable_section_item
.Lcompile_variable_section_end:
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
addi sp, sp, 24
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Compile a global variable.
@ -1111,18 +1029,6 @@ _compile_variable:
call _tokenize_next # Skip the type.
mv s1, a0
/* DEBUG
lw a0, 24(sp)
add a0, a0, '0'
sw a0, 24(sp)
addi a0, sp, 24
li a1, 1
call _write_error
lw a0, 28(sp)
li a1, 8
call _write_error
*/
# .type identifier, @object
la a0, asm_type
li a1, ASM_TYPE_SIZE
@ -1478,10 +1384,6 @@ _compile_statement:
j .Lcompile_statement_end
.Lcompile_statement_end:
sw a0, 12(sp)
call _skip_comment
lw a0, 12(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
@ -1492,19 +1394,19 @@ _compile_statement:
.type _compile_text_section, @function
_compile_text_section:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, section_text
li a1, SECTION_TEXT_SIZE
call _write_out
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _compile_entry_point, @function

View File

@ -183,7 +183,6 @@ begin
goto .Lcompile_identifier_expression_end
end
(* Global identifier. *);
loca8 := 0x6120616c;
_write_out(@loca8, 4);
loca8 := 0x00202c00 or loca80;
@ -208,11 +207,6 @@ begin
.Lcompile_identifier_expression_end
end
(*
Evalutes an expression and saves the result in a0.
a0 - X in aX, the register number to save the result.
*)
proc _build_expression()
var
loca0, loca20, loca28, loca8: Word
@ -305,13 +299,6 @@ begin
.Lbuild_expression_end
end
(*
Compiles an lvalue.
Parameters:
a0 - Pointer to the identifier.
a1 - Identifier length.
*)
proc _compile_designator_expression(loca84: ^Byte, loca80: Word)
var
loca0: Word
@ -352,12 +339,6 @@ begin
.Lcompile_designator_expression_end
end
(*
Compiles a statement beginning with an identifier.
Left values should be variables named "loca n", where n is the offset
of the variable on the stack, like loca8 or loca4.
*)
proc _compile_identifier()
var
loca0, loca16, loca8: Word
@ -393,13 +374,6 @@ begin
.Lcompile_identifier_end
end
(*
Compiles a procedure call. Expects s1 to point to the first argument.
a0 - Pointer to the procedure name.
a1 - Length of the procedure name.
Returns the procedure result in a0.
*)
proc _compile_call(loca84: ^Byte, loca80: Word)
var
loca0, loca4, loca12: Word
@ -422,10 +396,6 @@ begin
loca0 := 0x202c30;
_write_out(@loca0, 3);
(*
Only 6 arguments are supported with a0-a5.
Save all arguments on the stack so they aren't overriden afterwards.
*)
loca0 := -4 * loca12;
loca0 := loca0 + 60;
_printi(loca0);
@ -450,10 +420,7 @@ begin
loca12 := 0;
.Lcompile_call_restore;
(*
Just go through all a0-a5 registers and read them from stack.
If this stack value contains garbage, the procedure just shouldn't use it.
*)
loca0 := 0x6120776c;
_write_out(@loca0, 4);
loca4 := 0x36202c30;
@ -514,10 +481,6 @@ begin
_advance(1)
end
(*
Reads a token and returns its length in a0.
_read_token doesn't change s1, it finds the length of the token s1 is pointing to.
*)
proc _read_token()
var
loca0, loca4: Word
@ -612,7 +575,6 @@ begin
return loca4
end
(* Skips the spaces till the next non space character. *)
proc _skip_spaces()
var
loca0: Byte
@ -643,10 +605,6 @@ begin
.Lspace_loop_end
end
(*
Parameters:
a0 - Line length.
*)
proc _skip_comment(loca84: Word)
var
loca0: ^Byte
@ -684,10 +642,6 @@ begin
.Lskip_comment_end
end
(*
Parameters:
a0 - Line length.
*)
proc _compile_assembly(loca84: Word)
var loca0: ^Byte
begin
@ -846,7 +800,6 @@ begin
loca0 := 0x0a74;
_write_out(@loca0, 2);
(* .size identifier, size *);
loca0 := 0x7a69732e;
_write_out(@loca0, 4);
loca0 := 0x2065;
@ -882,7 +835,6 @@ begin
loca20 := _current();
_advance(loca16);
(* .type identifier, @function *);
loca0 := 0x7079742e;
_write_out(@loca0, 4);
loca0 := 0x2065;
@ -910,10 +862,6 @@ begin
loca12 := 0x6e;
loca8 := 0x69676562;
(*
Skip all declarations until we find the "begin" keyword, denoting the
beginning of the procedure body.
*)
.Lcompile_procedure_begin;
_skip_spaces();
loca0 := _read_token();
@ -1056,15 +1004,6 @@ begin
_write_out(@loca0, 4)
end
(*
Compares two string, which of one has a length, the other one is null-terminated.
a0 - The address of the token string.
a1 - The length of the string in a0.
a2 - The address of the null-terminated string.
If the strings match sets a0 to 0, otherwise sets it to 1.
*)
proc _token_compare(loca84: ^Byte, loca80: Word, loca76: ^Byte)
var
loca0: Bool
@ -1074,10 +1013,6 @@ begin
.Ltoken_compare_loop;
loca4 := _front(loca76);
(*
Will only be 0 if the current character in the null terminated string is \0 and the remaining length of the
another string is 0.
*)
loca8 := loca4 or loca80;
if loca8 = 0 then
goto .Ltoken_compare_equal
@ -1135,7 +1070,6 @@ begin
_put_char(0x0a)
end
(* a0 - Line length. *)
proc _compile_label(loca84: Word)
var
loca0: Word
@ -1187,7 +1121,6 @@ begin
_write_out(@loca12, 4);
_put_char(0x20);
(* Write the label *);
_write_out(@loca16, 4);
_printi(s2);
@ -1213,20 +1146,10 @@ begin
loca12 := 0x0a3a0a3a;
_write_out(@loca12, 2);
(* Increment the label counter. *);
s2 := s2 + 1;
_advance(4)
end
(*
Parameters:
a0 - Line length.
a1 - Whether the section header was already emitted. If not it should be
emitted before any code is written.
Returns 1 in a0 if the parsed line contained a text section element such a
procedure or the program entry point. Otherwise sets a0 to 0.
*)
proc _compile_line(loca84: Word, loca80: Bool)
var
loca0: Char
@ -1407,7 +1330,6 @@ begin
return loca8
end
(* Prints ".section .text" and exits. *)
proc _compile_text_section()
var loca0: Word
begin
@ -1466,7 +1388,6 @@ begin
_skip_spaces()
end
(* Finds the end of the line and returns its length in a0. *)
proc _read_line()
var
loca0: ^Byte
@ -1520,23 +1441,16 @@ begin
.Lcompile_end
end
(* Returns the pointer to the current position in the source text in a0. *)
proc _current()
begin
return s1
end
(* a0 is the number of bytes to advance in the source text. *)
proc _advance(loca84: Word)
begin
s1 := s1 + loca84
end
(*
a0 - Pointer to an array to get the first element.
Returns the first character in the remaining source text.
*)
proc _front(loca84: ^Word)
begin
return _get(loca84) & 0xff
@ -1549,7 +1463,6 @@ begin
s2 := 1
end
(* Entry point. *)
begin
_main();
_compile()

View File

@ -268,7 +268,7 @@ transitions:
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Integer
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
@ -286,9 +286,9 @@ transitions:
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x08 Dot
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
@ -302,13 +302,13 @@ transitions:
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x0c Zero
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff
.word 0x00ff, 0x02ff, 0x02ff, 0x02ff # 0x0d Hexadecimal
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
.section .text
@ -592,14 +592,14 @@ _tokenize_next:
j .Ltokenize_next_end
.Ltokenize_next_integer:
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
lw t1, 12(sp)
sw t1, 8(t0)
sub t1, s1, t1
sw t1, 4(t0)
j .Ltokenize_next_end
.Ltokenize_next_end: