Tokenize single character symbols

This commit is contained in:
Eugen Wissner 2025-05-03 23:35:41 +02:00
parent dcfd6b1515
commit 0a0bc4e1f2
Signed by: belka
GPG Key ID: A27FDC1E8EE902C0
6 changed files with 291 additions and 335 deletions

View File

@ -1,3 +1,6 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
source 'https://rubygems.org'

View File

@ -1,3 +1,6 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
require 'open3'

View File

@ -1,3 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy

View File

@ -1,3 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
@ -26,3 +30,23 @@
.equ TOKEN_DEFER, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
# The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, 27
.equ TOKEN_DOT, 28
.equ TOKEN_COMMA, 29
.equ TOKEN_COLON, 30
.equ TOKEN_SEMICOLON, 31
.equ TOKEN_LEFT_PAREN, 32
.equ TOKEN_RIGHT_PAREN, 33
.equ TOKEN_LEFT_BRACKET, 34
.equ TOKEN_RIGHT_BRACKET, 35
.equ TOKEN_HAT, 36
.equ TOKEN_EQUALS, 37
.equ TOKEN_PLUS, 38
.equ TOKEN_MINUS, 39
.equ TOKEN_ASTERISK, 40
.equ TOKEN_AT, 41
.equ TOKEN_ASSIGN, 42

View File

@ -1,6 +1,10 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _start # Program entry point.
# Global variables or registers.
# Registers used as global variables:
# s1 - Contains the current position in the source text.
# s2 - Label counter.
@ -42,6 +46,10 @@ asm_neg_a0: .ascii "neg a0, a0\n"
.equ ASM_NEG_A0_SIZE, . - asm_neg_a0
asm_type: .ascii ".type "
.equ ASM_TYPE_SIZE, . - asm_type
asm_type_function: .ascii ", @function\n"
.equ ASM_TYPE_FUNCTION_SIZE, . - asm_type_function
asm_type_object: .ascii ", @object\n"
.equ ASM_TYPE_OBJECT_SIZE, . - asm_type_object
asm_restore_parameters:
.ascii "lw a0, 60(sp)\nlw a1, 56(sp)\nlw a2, 52(sp)\nlw a3, 48(sp)\nlw a4, 44(sp)\nlw a5, 40(sp)\n"
.equ ASM_RESTORE_PARAMETERS_SIZE, . - asm_restore_parameters
@ -77,14 +85,6 @@ _compile_import:
call _tokenize_next
mv s1, a0
/* DEBUG
lw t0, 0(sp)
addi t0, t0, '0'
sw t0, 4(sp)
addi a0, sp, 4
li a1, 1
call _write_error*/
j .Lcompile_import_loop
.Lcompile_import_end:
@ -104,63 +104,35 @@ _build_binary_expression:
li a0, 0
call _build_expression
call _skip_spaces
call _read_token
sw a0, 20(sp)
li t0, '&'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_and
addi a1, sp, 16
call _tokenize_next
lw t0, 16(sp)
li t0, 0x726f # or
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_or
li t1, TOKEN_AND
beq t0, t1, .L_build_binary_expression_and
li t0, '='
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_equal
li t1, TOKEN_OR
beq t0, t1, .L_build_binary_expression_or
li t0, '+'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_plus
li t1, TOKEN_PLUS
beq t0, t1, .L_build_binary_expression_plus
li t0, '-'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_minus
li t1, TOKEN_EQUALS
beq t0, t1, .L_build_binary_expression_equal
li t0, '*'
sw t0, 16(sp)
mv a0, s1
lw a1, 20(sp)
addi a2, sp, 16
call _token_compare
beqz a0, .L_build_binary_expression_product
li t1, TOKEN_ASTERISK
beq t0, t1, .L_build_binary_expression_product
li t1, TOKEN_MINUS
beq t0, t1, .L_build_binary_expression_minus
j .Lbuild_binary_expression_end
.L_build_binary_expression_equal:
addi s1, s1, 1 # Skip =.
mv s1, a0 # Skip =.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@ -174,7 +146,12 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_and:
addi s1, s1, 1 # Skip &.
/* DEBUG
addi a0, s1, 0
li a1, 4
call _write_error */
mv s1, a0 # Skip &.
li a0, 1
call _build_expression
la a0, asm_and_a0_a1
@ -184,7 +161,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_or:
addi s1, s1, 2 # Skip or.
mv s1, a0 # Skip or.
li a0, 1
call _build_expression
la a0, asm_or_a0_a1
@ -194,7 +171,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_plus:
addi s1, s1, 1 # Skip +.
mv s1, a0 # Skip +.
li a0, 1
call _build_expression
la a0, asm_add_a0_a1
@ -204,7 +181,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_minus:
addi s1, s1, 1 # Skip -.
mv s1, a0 # Skip -.
li a0, 1
call _build_expression
la a0, asm_sub_a0_a1
@ -214,7 +191,7 @@ _build_binary_expression:
j .Lbuild_binary_expression_end
.L_build_binary_expression_product:
addi s1, s1, 1 # Skip *.
mv s1, a0 # Skip *.
li a0, 1
call _build_expression
la a0, asm_mul_a0_a1
@ -937,29 +914,31 @@ _skip_comment:
# Parameters:
# a0 - Line length.
.type _compile_assembly, @function
_compile_assembly:
.type _compile_procedure_section, @function
_compile_procedure_section:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp) # a0 - Line length.
.Lcompile_procedure_section_loop:
call _skip_spaces
call _skip_comment
call _skip_spaces
# Write the source to the standard output.
mv a0, s1
lw a1, 4(sp)
call _write_out
addi a1, sp, 0
call _tokenize_next
li t0, TOKEN_PROC
lw t1, 0(sp)
bne t0, t1, .Lcompile_procedure_section_end
lw t0, 4(sp)
add s1, s1, t0
call _compile_procedure
li a0, '\n'
call _put_char
addi s1, s1, 1 # Skip the new line.
j .Lcompile_procedure_section_loop
.Lcompile_procedure_section_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
@ -1038,15 +1017,19 @@ _compile_constant:
sw s0, 8(sp)
addi s0, sp, 16
call _read_token
mv a0, s1
addi a1, sp, 0
call _tokenize_next
mv a1, a0 # The identifier length from _read_token should be in a1.
mv a0, s1 # Save the identifier pointer before advancing it.
add s1, s1, a1
sub a1, a0, s1 # The identifier end from _tokenize_next should be in a0.
mv a0, s1
add s1, s1, a1 # Save the identifier pointer before advancing it.
call _write_out
call _skip_spaces
addi s1, s1, 2 # Skip the assignment sign.
mv a0, s1
addi a1, sp, 0
call _tokenize_next
mv s1, a0 # Skip the assignment sign.
# : .long
li t0, 0x20676e6f # ong_
@ -1154,42 +1137,10 @@ _compile_variable:
lw a1, 24(sp)
call _write_out
li t0, 0x0a74 # t\n
sw t0, 12(sp)
li t0, 0x63656a62 # bjec
sw t0, 8(sp)
li t0, 0x6f40202c # , @o
sw t0, 4(sp)
addi a0, sp, 4
li a1, 10
la a0, asm_type_object
li a1, ASM_TYPE_OBJECT_SIZE
call _write_out
# .size identifier, size
li t0, 0x2065 # e_
sw t0, 12(sp)
li t0, 0x7a69732e # .siz
sw t0, 8(sp)
addi a0, sp, 8
li a1, 6
call _write_out
lw a0, 28(sp)
lw a1, 24(sp)
call _write_out
li t0, 0x202c # ,_
sw t0, 12(sp)
addi a0, sp, 12
li a1, 2
call _write_out
lw a0, 20(sp)
lw a1, 16(sp)
call _write_out
li a0, '\n'
call _put_char
# identifier: .zero size
lw a0, 28(sp)
lw a1, 24(sp)
@ -1239,14 +1190,8 @@ _compile_procedure:
lw a1, 16(sp)
call _write_out
li t0, 0x0a6e6f69 # ion\n
sw t0, 12(sp)
li t0, 0x74636e75 # unct
sw t0, 8(sp)
li t0, 0x6640202c # , @f
sw t0, 4(sp)
addi a0, sp, 4
li a1, 12
la a0, asm_type_function
li a1, ASM_TYPE_FUNCTION_SIZE
call _write_out
lw a0, 20(sp)
@ -1356,7 +1301,7 @@ _compile_procedure:
beqz a0, .Lcompile_procedure_end
lw a0, 12(sp)
call _compile_line
call _compile_statement
j .Lcompile_procedure_body
.Lcompile_procedure_end:
@ -1577,7 +1522,7 @@ _compile_if:
call _read_line
li a1, 1
call _compile_line
call _compile_statement
j .Lcompile_if_loop
@ -1614,8 +1559,8 @@ _compile_if:
#
# Returns 1 in a0 if the parsed line contained a text section element such a
# procedure or the program entry point. Otherwise sets a0 to 0.
.type _compile_line, @function
_compile_line:
.type _compile_statement, @function
_compile_statement:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@ -1626,45 +1571,17 @@ _compile_line:
sw a0, 20(sp)
sw a1, 16(sp)
beqz a0, .Lcompile_line_empty # Skip an empty line.
lbu t0, (s1)
li t1, '('
beq t0, t1, .Lcompile_line_comment
li t0, 0x636f7270 # proc
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_procedure
li t0, 0x69676562 # begi
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_begin
li t0, 0x2e646e65 # end.
sw t0, 12(sp)
mv a0, s1
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_exit
call _skip_comment
mv a0, s1
lw a1, 20(sp)
call _is_local_identifier
bnez a0, .Lcompile_line_identifier
bnez a0, .Lcompile_statement_identifier
mv a0, s1
li a1, 2
call _is_register_identifier
bnez a0, .Lcompile_line_identifier
bnez a0, .Lcompile_statement_identifier
li t0, 0x6f746f67 # goto
sw t0, 12(sp)
@ -1672,7 +1589,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_goto
beqz a0, .Lcompile_statement_goto
li t0, 0x75746572 # retu
sw t0, 12(sp)
@ -1680,7 +1597,7 @@ _compile_line:
addi a1, sp, 12
li a2, 4
call _memcmp
beqz a0, .Lcompile_line_return
beqz a0, .Lcompile_statement_return
li t0, 0x6669 # if
sw t0, 12(sp)
@ -1688,77 +1605,42 @@ _compile_line:
addi a1, sp, 12
li a2, 2
call _memcmp
beqz a0, .Lcompile_line_if
beqz a0, .Lcompile_statement_if
lbu t0, (s1)
li t1, '.'
beq t0, t1, .Lcompile_line_label
beq t0, t1, .Lcompile_statement_label
li t1, '_'
beq t0, t1, .Lcompile_line_identifier
beq t0, t1, .Lcompile_statement_identifier
j .Lcompile_line_unchanged # Else.
j .Lcompile_statement_empty # Else.
.Lcompile_line_if:
.Lcompile_statement_if:
call _compile_if
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_label:
.Lcompile_statement_label:
lw a0, 20(sp)
call _compile_label
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_return:
.Lcompile_statement_return:
call _compile_return
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_goto:
.Lcompile_statement_goto:
call _compile_goto
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_identifier:
.Lcompile_statement_identifier:
call _compile_identifier
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_exit:
call _compile_exit
j .Lcompile_line_section
.Lcompile_line_begin:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_entry
call _compile_text_section
.Lcompile_line_compile_entry:
call _compile_entry_point
li a0, 1
j .Lcompile_line_end
.Lcompile_line_procedure:
lw a1, 16(sp)
bnez a1, .Lcompile_line_compile_procedure
call _compile_text_section
.Lcompile_line_compile_procedure:
call _compile_procedure
li a0, 1
j .Lcompile_line_end
.Lcompile_line_comment:
lw a0, 20(sp)
call _skip_comment
j .Lcompile_line_section
.Lcompile_line_empty:
.Lcompile_statement_empty:
addi s1, s1, 1
j .Lcompile_line_section
j .Lcompile_statement_end
.Lcompile_line_unchanged:
lw a0, 20(sp)
call _compile_assembly
j .Lcompile_line_section
.Lcompile_line_section:
mv a0, zero
.Lcompile_line_end:
.Lcompile_statement_end:
sw a0, 12(sp)
call _skip_spaces
call _skip_comment
@ -1804,20 +1686,25 @@ _compile_entry_point:
addi s1, s1, 6 # Skip begin\n.
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Generate the body of the procedure.
.Lcompile_entry_point_body:
call _skip_spaces
call _read_line
sw a0, 12(sp)
li t0, 0x2e646e65 # end
sw t0, 8(sp)
mv a0, s1
addi a1, sp, 8
li a2, 4
call _memcmp
.type _compile_exit, @function
_compile_exit:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
beqz a0, .Lcompile_entry_point_end
lw a0, 12(sp)
call _compile_statement
j .Lcompile_entry_point_body
.Lcompile_entry_point_end:
la a0, asm_exit
li a1, ASM_EXIT_SIZE
call _write_out
@ -1857,30 +1744,13 @@ _compile:
sw s0, 8(sp)
addi s0, sp, 16
sw zero, 4(sp) # Whether the text section header was already emitted.
call _compile_module_declaration
call _compile_import
call _compile_constant_section
call _compile_variable_section
.Lcompile_do:
lbu t0, (s1) # t0 = Current character.
beqz t0, .Lcompile_end # Exit the loop on the NUL character.
call _skip_spaces
call _read_line
lw a1, 4(sp)
call _compile_line
beqz a0, .Lcompile_do
# Update whether the text section header was already emitted.
lw t0, 4(sp)
or t0, t0, a0
sw t0, 4(sp)
j .Lcompile_do
.Lcompile_end:
call _compile_text_section
call _compile_procedure_section
call _compile_entry_point
# Epilogue.
lw ra, 12(sp)
@ -1888,22 +1758,6 @@ _compile:
addi sp, sp, 16
ret
.type _main, @function
_main:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
li s2, 1
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Entry point.
.type _start, @function
_start:
@ -1912,8 +1766,7 @@ _start:
li a1, SOURCE_BUFFER_SIZE # Buffer size.
call _read_file
mv a0, s1
call _main
li s2, 1
call _compile
# Call exit.

View File

@ -1,4 +1,10 @@
.global _tokenize_next, classification, transitions, keywords
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _tokenize_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
.section .rodata
@ -8,7 +14,7 @@
#
# Classification:
#
.equ CLASS_INVALID, 0x0
.equ CLASS_INVALID, 0x00
.equ CLASS_DIGIT, 0x01
.equ CLASS_CHARACTER, 0x02
.equ CLASS_SPACE, 0x03
@ -25,9 +31,11 @@
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
.equ CLASS_DOUBLE_QUOTE, 0x11
.equ CLASS_QUOTE, 0x11
.equ CLASS_GREATER, 0x12
.equ CLASS_LESS, 0x13
.equ CLASS_COUNT, 18
.equ CLASS_COUNT, 20
.type classification, @object
.size classification, 128
@ -66,12 +74,12 @@ classification:
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
.byte CLASS_DOUBLE_QUOTE # 22 "
.byte CLASS_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
.byte CLASS_SINGLE # 26 &
.byte 0x00 # 27 '
.byte CLASS_QUOTE # 27 '
.byte CLASS_LEFT_PAREN # 28 (
.byte CLASS_RIGHT_PAREN # 29 )
.byte CLASS_ASTERISK # 2A *
@ -92,9 +100,9 @@ classification:
.byte CLASS_DIGIT # 39 9
.byte CLASS_COLON # 3A :
.byte CLASS_SINGLE # 3B ;
.byte 0x00 # 3C <
.byte CLASS_LESS # 3C <
.byte CLASS_EQUALS # 3D =
.byte 0x00 # 3E >
.byte CLASS_GREATER # 3E >
.byte 0x00 # 3F ?
.byte CLASS_SINGLE # 40 @
.byte CLASS_CHARACTER # 41 A
@ -220,7 +228,10 @@ keywords:
.ascii "case"
.word 2
.ascii "of"
.size keywords, . - keywords
.type byte_keywords, @object
byte_keywords: .ascii "&.,:;()[]^=+-*@"
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
.section .data
@ -240,78 +251,66 @@ keywords:
# handles each action.
#
.type transitions, @object
.size transitions, 17 * CLASS_COUNT # state count * CLASS_COUNT
.size transitions, 14 * CLASS_COUNT # state count * CLASS_COUNT
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - "
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x0105, 0x0106, 0x0107
.word 0x0108, 0x0102, 0x010b, 0x0102, 0x010c, 0x0102, 0x00ff, 0x010e # 00 Start
.word 0x010f, 0x0110
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x0108
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x0104, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 01 Colon
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff # 02 Identifier
.word 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x02ff, 0x0103, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff # 03 Integer
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x0103, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x03 Integer
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 04 Assign
.word 0x02ff, 0x02ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 05 Eauals
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x0109, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 06 Left paren
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 07 Right paren
.word 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 08 Asterisk
.word 0x02ff, 0x02ff
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 09 Comment
.word 0x0109, 0x0109
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109 # 0a Closing comment
.word 0x0109, 0x0109
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0b Single character token
.word 0x02ff, 0x02ff
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff # 0c Zero
.word 0x02ff, 0x02ff
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff # 0d Hexadecimal
.word 0x00ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
.word 0x02ff, 0x0102, 0x0102, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff # 0e Dot
.word 0x02ff, 0x02ff
.word 0x02ff, 0x0102, 0x02ff, 0x0102, 0x0102, 0x0102, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x08 Dot
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0f Minus
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
.word 0x00ff, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110 # 10 Starting string.
.word 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110, 0x0110
.word 0x0110, 0x04ff
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x02ff, 0x00ff, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x00ff, 0x00ff, 0x010d, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x0c Zero
.word 0x02ff, 0x010d, 0x00ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x010d, 0x010d, 0x00ff, 0x2ff, 0x02ff
.word 0x00ff, 0x02ff, 0x02ff, 0x02ff # 0x0d Hexadecimal
.section .text
@ -406,6 +405,57 @@ _classify_identifier:
addi sp, sp, 16
ret
# Takes a symbol and determines its type.
#
# Parameters:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type _classify_single, @function
_classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a1, a0
li a2, BYTE_KEYWORDS_SIZE
la a0, byte_keywords
call _memchr
la a1, byte_keywords
sub a0, a0, a1
addi a0, a0, 27
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Classified a symbol containing multiple characters (probably 2).
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_composite, @function
_classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
j .Lclassify_composite_end
.Lclassify_composite_assign:
li a0, TOKEN_ASSIGN
j .Lclassify_composite_end
.Lclassify_composite_end:
ret
# Initializes the classification table.
#
# Paramaters:
@ -453,12 +503,18 @@ _tokenize_next:
li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip
li t0, 0x04 # Comment action.
li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Ltokenize_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite
j .Ltokenize_next_reject
.Ltokenize_next_reject:
@ -481,24 +537,17 @@ _tokenize_next:
.Ltokenize_next_print:
/* DEBUG
lw a0, 4(sp)
mv a1, s1
sub a1, a1, a0
call _write_error
DEBUG */
addi a0, a0, 21
sw a0, 0(sp)
addi a0, sp, 0
li a1, 1
call _write_error */
j .Ltokenize_next_end
.Ltokenize_next_comment:
addi s1, s1, 1
/* DEBUG
lw a0, 4(sp)
mv a1, s1
sub a1, a1, a0
call _write_error
DEBUG */
j .Ltokenize_next_end
.Ltokenize_next_identifier:
@ -512,6 +561,26 @@ _tokenize_next:
j .Ltokenize_next_end
.Ltokenize_next_single:
lw a0, 4(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
.Ltokenize_next_composite:
addi s1, s1, 1
lw a1, 4(sp)
sub a0, s1, a1
call _classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.