summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugen Wissner <belka@caraus.de>2025-05-03 23:35:41 +0200
committerEugen Wissner <belka@caraus.de>2025-05-03 23:35:41 +0200
commit0a0bc4e1f2e263cdda83bd78a2a77ff68ae2334e (patch)
tree23e6b113d741761a2e897e4138a96c7f7701d348
parentdcfd6b1515679cfbc75de12a17352d9d1eddceaf (diff)
downloadelna-0a0bc4e1f2e263cdda83bd78a2a77ff68ae2334e.tar.gz
Tokenize single character symbols
-rw-r--r--Gemfile3
-rw-r--r--Rakefile3
-rw-r--r--boot/common-boot.s4
-rw-r--r--boot/definitions.inc24
-rw-r--r--boot/stage1.s369
-rw-r--r--boot/tokenizer.s207
6 files changed, 283 insertions, 327 deletions
diff --git a/Gemfile b/Gemfile
index 235744c..97bdde4 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,3 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
source 'https://rubygems.org'
diff --git a/Rakefile b/Rakefile
index 2bc2683..71b3293 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,3 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
require 'open3'
diff --git a/boot/common-boot.s b/boot/common-boot.s
index 26dad8d..a6fb04e 100644
--- a/boot/common-boot.s
+++ b/boot/common-boot.s
@@ -1,3 +1,7 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
diff --git a/boot/definitions.inc b/boot/definitions.inc
index 0e2f54e..4d8ab9c 100644
--- a/boot/definitions.inc
+++ b/boot/definitions.inc
@@ -1,3 +1,7 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
@@ -26,3 +30,23 @@
.equ TOKEN_DEFER, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
+
+# The constant should match the character index in the byte_keywords string.
+
+.equ TOKEN_AND, 27
+.equ TOKEN_DOT, 28
+.equ TOKEN_COMMA, 29
+.equ TOKEN_COLON, 30
+.equ TOKEN_SEMICOLON, 31
+.equ TOKEN_LEFT_PAREN, 32
+.equ TOKEN_RIGHT_PAREN, 33
+.equ TOKEN_LEFT_BRACKET, 34
+.equ TOKEN_RIGHT_BRACKET, 35
+.equ TOKEN_HAT, 36
+.equ TOKEN_EQUALS, 37
+.equ TOKEN_PLUS, 38
+.equ TOKEN_MINUS, 39
+.equ TOKEN_ASTERISK, 40
+.equ TOKEN_AT, 41
+
+.equ TOKEN_ASSIGN, 42
diff --git a/boot/stage1.s b/boot/stage1.s
index 9ab072d..6761bb2 100644
--- a/boot/stage1.s
+++ b/boot/stage1.s
@@ -1,6 +1,10 @@
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
.global _start # Program entry point.
-# Global variables or registers.
+# Registers used as global variables:
# s1 - Contains the current position in the source text.
# s2 - Label counter.
@@ -42,6 +46,10 @@ asm_neg_a0: .ascii "neg a0, a0\n"
.equ ASM_NEG_A0_SIZE, . - asm_neg_a0
asm_type: .ascii ".type "
.equ ASM_TYPE_SIZE, . - asm_type
+asm_type_function: .ascii ", @function\n"
+.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function
+asm_type_object: .ascii ", @object\n"
+.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object
asm_restore_parameters:
.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
@@ -77,14 +85,6 @@ _compile_import:
call _tokenize_next
mv s1, a0
- /* DEBUG
- lw t0, 0(sp)
- addi t0, t0, '0'
- sw t0, 4(sp)
- addi a0, sp, 4
- li a1, 1
- call _write_error*/
-
j .Lcompile_import_loop
.Lcompile_import_end:
@@ -104,63 +104,35 @@ _build_binary_expression:
li a0, 0
call _build_expression
-
call _skip_spaces
- call _read_token
- sw a0, 20(sp)
- li t0, '&'
- sw t0, 16(sp)
mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_and
+ addi a1, sp, 16
+ call _tokenize_next
+ lw t0, 16(sp)
- li t0, 0x726f # or
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_or
+ li t1, TOKEN_AND
+ beq t0, t1, .L_build_binary_expression_and
- li t0, '='
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_equal
+ li t1, TOKEN_OR
+ beq t0, t1, .L_build_binary_expression_or
- li t0, '+'
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_plus
+ li t1, TOKEN_PLUS
+ beq t0, t1, .L_build_binary_expression_plus
- li t0, '-'
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_minus
+ li t1, TOKEN_EQUALS
+ beq t0, t1, .L_build_binary_expression_equal
- li t0, '*'
- sw t0, 16(sp)
- mv a0, s1
- lw a1, 20(sp)
- addi a2, sp, 16
- call _token_compare
- beqz a0, .L_build_binary_expression_product
+ li t1, TOKEN_ASTERISK
+ beq t0, t1, .L_build_binary_expression_product
+
+ li t1, TOKEN_MINUS
+ beq t0, t1, .L_build_binary_expression_minus
j .Lbuild_binary_expression_end
.L_build_binary_expression_equal:
- addi s1, s1, 1 # Skip =.
+ mv s1, a0 # Skip =.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@@ -174,7 +146,12 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_and:
- addi s1, s1, 1 # Skip &.
+ /* DEBUG
+ addi a0, s1, 0
+ li a1, 4
+ call _write_error */
+
+ mv s1, a0 # Skip &.
li a0, 1
call _build_expression
la a0, asm_and_a0_a1
@@ -184,7 +161,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_or:
- addi s1, s1, 2 # Skip or.
+ mv s1, a0 # Skip or.
li a0, 1
call _build_expression
la a0, asm_or_a0_a1
@@ -194,7 +171,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_plus:
- addi s1, s1, 1 # Skip +.
+ mv s1, a0 # Skip +.
li a0, 1
call _build_expression
la a0, asm_add_a0_a1
@@ -204,7 +181,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_minus:
- addi s1, s1, 1 # Skip -.
+ mv s1, a0 # Skip -.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@@ -214,7 +191,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_product:
- addi s1, s1, 1 # Skip *.
+ mv s1, a0 # Skip *.
li a0, 1
call _build_expression
la a0, asm_mul_a0_a1
@@ -937,29 +914,31 @@ _skip_comment:
# Parameters:
# a0 - Line length.
-.type _compile_assembly, @function
-_compile_assembly:
+.type _compile_procedure_section, @function
+_compile_procedure_section:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
- sw a0, 4(sp) # a0 - Line length.
+.Lcompile_procedure_section_loop:
+ call _skip_spaces
+ call _skip_comment
+ call _skip_spaces
- # Write the source to the standard output.
mv a0, s1
- lw a1, 4(sp)
- call _write_out
-
- lw t0, 4(sp)
- add s1, s1, t0
+ addi a1, sp, 0
+ call _tokenize_next
+ li t0, TOKEN_PROC
+ lw t1, 0(sp)
+ bne t0, t1, .Lcompile_procedure_section_end
- li a0, '\n'
- call _put_char
+ call _compile_procedure
- addi s1, s1, 1 # Skip the new line.
+ j .Lcompile_procedure_section_loop
+.Lcompile_procedure_section_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@@ -1038,15 +1017,19 @@ _compile_constant:
sw s0, 8(sp)
addi s0, sp, 16
- call _read_token
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
- mv a1, a0 # The identifier length from _read_token should be in a1.
- mv a0, s1 # Save the identifier pointer before advancing it.
- add s1, s1, a1
+ sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
+ mv a0, s1
+ add s1, s1, a1 # Save the identifier pointer before advancing it.
call _write_out
- call _skip_spaces
- addi s1, s1, 2 # Skip the assignment sign.
+ mv a0, s1
+ addi a1, sp, 0
+ call _tokenize_next
+ mv s1, a0 # Skip the assignment sign.
# : .long
li t0, 0x20676e6f # ong_
@@ -1154,42 +1137,10 @@ _compile_variable:
lw a1, 24(sp)
call _write_out
- li t0, 0x0a74 # t\n
- sw t0, 12(sp)
- li t0, 0x63656a62 # bjec
- sw t0, 8(sp)
- li t0, 0x6f40202c # , @o
- sw t0, 4(sp)
- addi a0, sp, 4
- li a1, 10
- call _write_out
-
- # .size identifier, size
- li t0, 0x2065 # e_
- sw t0, 12(sp)
- li t0, 0x7a69732e # .siz
- sw t0, 8(sp)
- addi a0, sp, 8
- li a1, 6
- call _write_out
-
- lw a0, 28(sp)
- lw a1, 24(sp)
- call _write_out
-
- li t0, 0x202c # ,_
- sw t0, 12(sp)
- addi a0, sp, 12
- li a1, 2
- call _write_out
-
- lw a0, 20(sp)
- lw a1, 16(sp)
+ la a0, asm_type_object
+ li a1, ASM_TYPE_OBJECT_SIZE
call _write_out
- li a0, '\n'
- call _put_char
-
# identifier: .zero size
lw a0, 28(sp)
lw a1, 24(sp)
@@ -1239,14 +1190,8 @@ _compile_procedure:
lw a1, 16(sp)
call _write_out
- li t0, 0x0a6e6f69 # ion\n
- sw t0, 12(sp)
- li t0, 0x74636e75 # unct
- sw t0, 8(sp)
- li t0, 0x6640202c # , @f
- sw t0, 4(sp)
- addi a0, sp, 4
- li a1, 12
+ la a0, asm_type_function
+ li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
@@ -1356,7 +1301,7 @@ _compile_procedure:
beqz a0, .Lcompile_procedure_end
lw a0, 12(sp)
- call _compile_line
+ call _compile_statement
j .Lcompile_procedure_body
.Lcompile_procedure_end:
@@ -1577,7 +1522,7 @@ _compile_if:
call _read_line
li a1, 1
- call _compile_line
+ call _compile_statement
j .Lcompile_if_loop
@@ -1614,8 +1559,8 @@ _compile_if:
#
# Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0.
-.type _compile_line, @function
-_compile_line:
+.type _compile_statement, @function
+_compile_statement:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@@ -1626,45 +1571,17 @@ _compile_line:
sw a0, 20(sp)
sw a1, 16(sp)
- beqz a0, .Lcompile_line_empty # Skip an empty line.
-
- lbu t0, (s1)
- li t1, '('
- beq t0, t1, .Lcompile_line_comment
-
- li t0, 0x636f7270 # proc
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_procedure
-
- li t0, 0x69676562 # begi
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_begin
-
- li t0, 0x2e646e65 # end.
- sw t0, 12(sp)
- mv a0, s1
- addi a1, sp, 12
- li a2, 4
- call _memcmp
- beqz a0, .Lcompile_line_exit
+ call _skip_comment
mv a0, s1
lw a1, 20(sp)
call _is_local_identifier
- bnez a0, .Lcompile_line_identifier
+ bnez a0, .Lcompile_statement_identifier
mv a0, s1
li a1, 2
call _is_register_identifier
- bnez a0, .Lcompile_line_identifier
+ bnez a0, .Lcompile_statement_identifier
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
@@ -1672,7 +1589,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
- beqz a0, .Lcompile_line_goto
+ beqz a0, .Lcompile_statement_goto
li t0, 0x75746572 # retu
sw t0, 12(sp)
@@ -1680,7 +1597,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
- beqz a0, .Lcompile_line_return
+ beqz a0, .Lcompile_statement_return
li t0, 0x6669 # if
sw t0, 12(sp)
@@ -1688,77 +1605,42 @@ _compile_line:
addi a1, sp, 12
li a2, 2
call _memcmp
- beqz a0, .Lcompile_line_if
+ beqz a0, .Lcompile_statement_if
lbu t0, (s1)
li t1, '.'
- beq t0, t1, .Lcompile_line_label
+ beq t0, t1, .Lcompile_statement_label
li t1, '_'
- beq t0, t1, .Lcompile_line_identifier
+ beq t0, t1, .Lcompile_statement_identifier
- j .Lcompile_line_unchanged # Else.
+ j .Lcompile_statement_empty # Else.
-.Lcompile_line_if:
+.Lcompile_statement_if:
call _compile_if
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_label:
+.Lcompile_statement_label:
lw a0, 20(sp)
call _compile_label
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_return:
+.Lcompile_statement_return:
call _compile_return
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_goto:
+.Lcompile_statement_goto:
call _compile_goto
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_identifier:
+.Lcompile_statement_identifier:
call _compile_identifier
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_exit:
- call _compile_exit
- j .Lcompile_line_section
-
-.Lcompile_line_begin:
- lw a1, 16(sp)
- bnez a1, .Lcompile_line_compile_entry
- call _compile_text_section
-.Lcompile_line_compile_entry:
- call _compile_entry_point
- li a0, 1
- j .Lcompile_line_end
-
-.Lcompile_line_procedure:
- lw a1, 16(sp)
- bnez a1, .Lcompile_line_compile_procedure
- call _compile_text_section
-.Lcompile_line_compile_procedure:
- call _compile_procedure
- li a0, 1
- j .Lcompile_line_end
-
-.Lcompile_line_comment:
- lw a0, 20(sp)
- call _skip_comment
- j .Lcompile_line_section
-
-.Lcompile_line_empty:
+.Lcompile_statement_empty:
addi s1, s1, 1
- j .Lcompile_line_section
+ j .Lcompile_statement_end
-.Lcompile_line_unchanged:
- lw a0, 20(sp)
- call _compile_assembly
- j .Lcompile_line_section
-
-.Lcompile_line_section:
- mv a0, zero
-
-.Lcompile_line_end:
+.Lcompile_statement_end:
sw a0, 12(sp)
call _skip_spaces
call _skip_comment
@@ -1804,20 +1686,25 @@ _compile_entry_point:
addi s1, s1, 6 # Skip begin\n.
- # Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
- ret
+ # Generate the body of the procedure.
+.Lcompile_entry_point_body:
+ call _skip_spaces
+ call _read_line
+ sw a0, 12(sp)
+ li t0, 0x2e646e65 # end
+ sw t0, 8(sp)
+ mv a0, s1
+ addi a1, sp, 8
+ li a2, 4
+ call _memcmp
-.type _compile_exit, @function
-_compile_exit:
- # Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
+ beqz a0, .Lcompile_entry_point_end
+
+ lw a0, 12(sp)
+ call _compile_statement
+ j .Lcompile_entry_point_body
+.Lcompile_entry_point_end:
la a0, asm_exit
li a1, ASM_EXIT_SIZE
call _write_out
@@ -1857,30 +1744,13 @@ _compile:
sw s0, 8(sp)
addi s0, sp, 16
- sw zero, 4(sp) # Whether the text section header was already emitted.
-
call _compile_module_declaration
call _compile_import
call _compile_constant_section
call _compile_variable_section
-
-.Lcompile_do:
- lbu t0, (s1) # t0 = Current character.
- beqz t0, .Lcompile_end # Exit the loop on the NUL character.
-
- call _skip_spaces
- call _read_line
- lw a1, 4(sp)
- call _compile_line
-
- beqz a0, .Lcompile_do
- # Update whether the text section header was already emitted.
- lw t0, 4(sp)
- or t0, t0, a0
- sw t0, 4(sp)
-
- j .Lcompile_do
-.Lcompile_end:
+ call _compile_text_section
+ call _compile_procedure_section
+ call _compile_entry_point
# Epilogue.
lw ra, 12(sp)
@@ -1888,22 +1758,6 @@ _compile:
addi sp, sp, 16
ret
-.type _main, @function
-_main:
- # Prologue.
- addi sp, sp, -8
- sw ra, 4(sp)
- sw s0, 0(sp)
- addi s0, sp, 8
-
- li s2, 1
-
- # Epilogue.
- lw ra, 4(sp)
- lw s0, 0(sp)
- addi sp, sp, 8
- ret
-
# Entry point.
.type _start, @function
_start:
@@ -1912,8 +1766,7 @@ _start:
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
- mv a0, s1
- call _main
+ li s2, 1
call _compile
# Call exit.
diff --git a/boot/tokenizer.s b/boot/tokenizer.s
index 4315f66..67b2602 100644
--- a/boot/tokenizer.s
+++ b/boot/tokenizer.s
@@ -1,4 +1,10 @@
-.global _tokenize_next, classification, transitions, keywords
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+.global _tokenize_next, classification, transitions, keywords, byte_keywords
+
+.include "boot/definitions.inc"
.section .rodata
@@ -8,7 +14,7 @@
#
# Classification:
#
-.equ CLASS_INVALID, 0x0
+.equ CLASS_INVALID, 0x00
.equ CLASS_DIGIT, 0x01
.equ CLASS_CHARACTER, 0x02
.equ CLASS_SPACE, 0x03
@@ -25,9 +31,11 @@
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
-.equ CLASS_DOUBLE_QUOTE, 0x11
+.equ CLASS_QUOTE, 0x11
+.equ CLASS_GREATER, 0x12
+.equ CLASS_LESS, 0x13
-.equ CLASS_COUNT, 18
+.equ CLASS_COUNT, 20
.type classification, @object
.size classification, 128
@@ -66,12 +74,12 @@ classification:
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
- .byte CLASS_DOUBLE_QUOTE # 22 "
+ .byte CLASS_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
.byte CLASS_SINGLE # 26 &
- .byte 0x00 # 27 '
+ .byte CLASS_QUOTE # 27 '
.byte CLASS_LEFT_PAREN # 28 (
.byte CLASS_RIGHT_PAREN # 29 )
.byte CLASS_ASTERISK # 2A *
@@ -92,9 +100,9 @@ classification:
.byte CLASS_DIGIT # 39 9
.byte CLASS_COLON # 3A :
.byte CLASS_SINGLE # 3B ;
- .byte 0x00 # 3C <
+ .byte CLASS_LESS # 3C <
.byte CLASS_EQUALS # 3D =
- .byte 0x00 # 3E >
+ .byte CLASS_GREATER # 3E >
.byte 0x00 # 3F ?
.byte CLASS_SINGLE # 40 @
.byte CLASS_CHARACTER # 41 A
@@ -220,7 +228,10 @@ keywords:
.ascii "case"
.word 2
.ascii "of"
-.size keywords, . - keywords
+
+.type byte_keywords, @object
+byte_keywords: .ascii "&.,:;()[]^=+-*@"
+.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
.section .data
@@ -240,78 +251,66 @@ keywords:
# handles each action.
#
.type transitions, @object
-.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
+.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
- # - "
- .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
- .word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
- .word 0x010f, 0x0110
+ # - " or ' > <
+ .word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
+ .word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108
+ .word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
- .word 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
- .word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
- .word 0x05ff, 0x05ff
+ .word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
+ .word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
- .word 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x03 Integer
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
- .word 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
- .word 0x02ff, 0x02ff
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
+ .word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
- .word 0x02ff, 0x02ff
+ .word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
- .word 0x02ff, 0x02ff
-
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
- .word 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
+
+ .word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
+ .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x08 Dot
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
- .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
- .word 0x0109, 0x0109
+ .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
+ .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
- .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
- .word 0x0109, 0x0109
+ .word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
+ .word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
- .word 0x02ff, 0x02ff
+ .word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
+ .word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
+ .word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
- .word 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x0c Zero
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
- .word 0x00ff, 0x02ff
-
- .word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
- .word 0x02ff, 0x02ff
-
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
- .word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
- .word 0x02ff, 0x02ff
-
- .word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
- .word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
- .word 0x0110, 0x04ff
+ .word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff
+ .word 0x00ff, 0x02ff, 0x02ff, 0x02ff # 0x0d Hexadecimal
.section .text
@@ -406,6 +405,57 @@ _classify_identifier:
addi sp, sp, 16
ret
+# Takes a symbol and determines its type.
+#
+# Parameters:
+# a0 - Token character.
+#
+# Sets a0 to the appropriate token type.
+.type _classify_single, @function
+_classify_single:
+ # Prologue.
+ addi sp, sp, -16
+ sw ra, 12(sp)
+ sw s0, 8(sp)
+ addi s0, sp, 16
+
+ mv a1, a0
+ li a2, BYTE_KEYWORDS_SIZE
+ la a0, byte_keywords
+ call _memchr
+
+ la a1, byte_keywords
+ sub a0, a0, a1
+ addi a0, a0, 27
+
+ # Epilogue.
+ lw ra, 12(sp)
+ lw s0, 8(sp)
+ addi sp, sp, 16
+ ret
+
+# Classified a symbol containing multiple characters (probably 2).
+#
+# Parameters:
+# a0 - Token length.
+# a1 - Token pointer.
+#
+# Sets a0 to the appropriate token type.
+.type _classify_composite, @function
+_classify_composite:
+ lbu t0, 0(a1)
+ li t1, ':'
+ beq t0, t1, .Lclassify_composite_assign
+
+ j .Lclassify_composite_end
+
+.Lclassify_composite_assign:
+ li a0, TOKEN_ASSIGN
+ j .Lclassify_composite_end
+
+.Lclassify_composite_end:
+ ret
+
# Initializes the classification table.
#
# Paramaters:
@@ -453,12 +503,18 @@ _tokenize_next:
li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip
- li t0, 0x04 # Comment action.
+ li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
+ li t0, 0x06 # Single character symbol action.
+ beq t1, t0, .Ltokenize_next_single
+
+ li t0, 0x07 # An action for symbols containing multiple characters.
+ beq t1, t0, .Ltokenize_next_composite
+
j .Ltokenize_next_reject
.Ltokenize_next_reject:
@@ -481,24 +537,17 @@ _tokenize_next:
.Ltokenize_next_print:
/* DEBUG
- lw a0, 4(sp)
- mv a1, s1
- sub a1, a1, a0
- call _write_error
- DEBUG */
+ addi a0, a0, 21
+ sw a0, 0(sp)
+ addi a0, sp, 0
+ li a1, 1
+ call _write_error */
j .Ltokenize_next_end
.Ltokenize_next_comment:
addi s1, s1, 1
- /* DEBUG
- lw a0, 4(sp)
- mv a1, s1
- sub a1, a1, a0
- call _write_error
- DEBUG */
-
j .Ltokenize_next_end
.Ltokenize_next_identifier:
@@ -512,6 +561,26 @@ _tokenize_next:
j .Ltokenize_next_end
+.Ltokenize_next_single:
+ lw a0, 4(sp)
+ addi s1, a0, 1
+ lbu a0, (a0)
+ call _classify_single
+ lw a1, 0(sp)
+ sw a0, (a1)
+
+ j .Ltokenize_next_end
+
+.Ltokenize_next_composite:
+ addi s1, s1, 1
+ lw a1, 4(sp)
+ sub a0, s1, a1
+ call _classify_composite
+ lw a1, 0(sp)
+ sw a0, (a1)
+
+ j .Ltokenize_next_end
+
.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.