diff options
Diffstat (limited to 'boot/stage14.elna')
| -rw-r--r-- | boot/stage14.elna | 1300 |
1 files changed, 796 insertions, 504 deletions
diff --git a/boot/stage14.elna b/boot/stage14.elna index e3091dc..6e5e186 100644 --- a/boot/stage14.elna +++ b/boot/stage14.elna @@ -6,36 +6,42 @@ (* Stage 14 compiler. *) +(* - Binary minus. *) +(* - Space independent parsing. *) +(* - Label names in goto statements aren't required to begin with a dot. *) +(* - Dereferencing pointers pointing to word long data. *) + const symbol_builtin_name_int := "Int"; symbol_builtin_name_word := "Word"; symbol_builtin_name_pointer := "Pointer"; symbol_builtin_name_char := "Char"; - symbol_builtin_name_bool := "Bool"; + symbol_builtin_name_array := "Array"; (* Every type info starts with a word describing what type it is. PRIMITIVE_TYPE = 1 + ENUMERATION_TYPE = 2 Primitive types have only type size. *) symbol_builtin_type_int := S(1, 4); symbol_builtin_type_word := S(1, 4); symbol_builtin_type_pointer := S(1, 4); symbol_builtin_type_char := S(1, 1); - symbol_builtin_type_bool := S(1, 1); + symbol_builtin_type_array := S(1, 4); (* Info objects start with a word describing its type. - INFO_TYPE = 1 - INFO_PARAMETER = 2 - INFO_TEMPORARY = 3 + TYPE_INFO = 1 + PARAMETER_INFO = 2 + TEMPORARY_INFO = 3 Type info has the type it belongs to. *) symbol_type_info_int := S(1, @symbol_builtin_type_int); symbol_type_info_word := S(1, @symbol_builtin_type_word); symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer); symbol_type_info_char := S(1, @symbol_builtin_type_char); - symbol_type_info_bool := S(1, @symbol_builtin_type_bool); + symbol_type_info_array := S(1, @symbol_builtin_type_array); var source_code: Array; @@ -43,12 +49,15 @@ var symbol_table_global: Array; symbol_table_local: Array; classification: Array; + + (* To reserve memory just add the value of needed bytes to the memory_free_pointer_variable. *) memory: Array; compiler_strings_position: Pointer := @compiler_strings; compiler_strings_length: Word := 0; label_counter: Word := 0; - source_code_position: Pointer := @source_code; + + (* Points to a segment of free memory. *) memory_free_pointer: Word := @memory; (** @@ -374,29 +383,18 @@ begin return destination end; -(** - * Prints the current token. - * - * Parameters: - * length - Token length. - * - * Returns a0 unchanged. - *) -proc _write_token(length: Word); -begin - _write_s(source_code_position, length); - return length -end; - proc _compile_integer_literal(); var integer_token: Word; + integer_length: Word; token_kind: Word; begin _write_z("\tli t0, \0"); - integer_token := _lexer_read_token(@token_kind); - _write_token(integer_token); + integer_token := _lexer_global_get_start(); + integer_length := _lexer_global_get_end() + -integer_token; + + _write_s(integer_token, integer_length); _lexer_skip_token(); _write_c('\n') @@ -405,18 +403,16 @@ end; proc _compile_character_literal(); var character: Word; + token_kind: Word; + character_length: Word; begin - _write_z("\tli t0, '\0"); - source_code_position := source_code_position + 1; + character := _lexer_global_get_start(); + character_length := _lexer_global_get_end() + -character; - character := _load_byte(source_code_position); - if character = '\\' then - _write_c('\\'); - source_code_position := source_code_position + 1 - end; - _write_s(source_code_position, 1); - _write_s("'\n", 2); - source_code_position := source_code_position + 2 + _write_z("\tli t0, \0"); + _write_s(character, character_length); + _write_c('\n'); + _lexer_skip_token() end; proc _compile_variable_expression(); @@ -425,41 +421,49 @@ begin _write_z("\tlw t0, (t0)\n\0") end; +(** + * Compiled take address expression, starting with an "@" sign. + *) proc _compile_address_expression(); begin - (* Skip the "@" sign. *) - source_code_position := source_code_position + 1; + _lexer_skip_token(); _compile_designator() end; +(** + * Compile unary negation, "-" sign. + *) proc _compile_negate_expression(); begin - (* Skip the "-" sign. *) - source_code_position := source_code_position + 1; + _lexer_skip_token(); _compile_term(); - _write_z("\tneg t0, t0\n\0") end; +(* Compile unary negation, "~" sign. *) proc _compile_not_expression(); +var + token_kind: Word; begin - (* Skip the "~" sign. *) - source_code_position := source_code_position + 1; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _compile_term(); - _write_z("\tnot t0, t0\n\0") -end; +end; proc _compile_string_literal(); var + token_kind: Word; + token_start: Word; length: Word; offset: Word; begin - length := _string_length(source_code_position); - offset := _add_string(source_code_position); + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + length := _string_length(token_start); + offset := _add_string(token_start); - source_code_position := source_code_position + length; - source_code_position := source_code_position + 2; + _lexer_skip_token(); _write_z("\tla t0, strings\n\0"); _write_z("\tli t1, \0"); @@ -472,33 +476,40 @@ end; proc _compile_term(); var current_character: Word; + token_kind: Word; begin - current_character := _load_byte(source_code_position); + _lexer_read_token(@token_kind); - if current_character = '\'' then + if token_kind = _lexer_token_kind_character() then _compile_character_literal() - elsif current_character = '@' then + elsif token_kind = _lexer_token_kind_string() then + _compile_string_literal() + elsif token_kind = _lexer_token_kind_integer() then + _compile_integer_literal() + elsif token_kind = _lexer_token_kind_at() then _compile_address_expression() - elsif current_character = '-' then + elsif token_kind = _lexer_token_kind_minus() then _compile_negate_expression() - elsif current_character = '~' then + elsif token_kind = _lexer_token_kind_not() then _compile_not_expression() - elsif current_character = '"' then - _compile_string_literal() - elsif current_character = '_' then - _compile_call(); - _write_z("\nmv t0, a0\n\0") - elsif _is_digit(current_character) = 1 then - _compile_integer_literal() - elsif _is_lower(current_character) = 1 then - _compile_variable_expression() + elsif token_kind = _lexer_token_kind_identifier() then + current_character := _lexer_global_get_start(); + current_character := _load_byte(current_character); + + (* This is a call if the statement starts with an underscore. *) + if current_character = '_' then + _compile_call(); + _write_z("\tmv t0, a0\n\0") + else + _compile_variable_expression() + end end end; proc _compile_binary_rhs(); begin - (* Skip the whitespace after the binary operator. *) - source_code_position := source_code_position + 1; + (* Save the value of the left expression on the stack. *) + _write_z("\tsw t0, 64(sp)\n\0"); _compile_term(); (* Load the left expression from the stack; *) @@ -507,108 +518,96 @@ end; proc _compile_expression(); var - current_character: Word; + token_kind: Word; begin _compile_term(); - current_character := _load_byte(source_code_position); - - if current_character <> ' ' then - goto .compile_expression_end - end; - (* It is a binary expression. *) - (* Save the value of the left expression on the stack. *) - _write_z("sw t0, 64(sp)\n\0"); + _lexer_read_token(@token_kind); - (* Skip surrounding whitespace in front of the operator. *) - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); + if token_kind = _lexer_token_kind_plus() then + _lexer_skip_token(); + _compile_binary_rhs(); - if current_character = '+' then - source_code_position := source_code_position + 1; + (* Execute the operation. *) + _write_z("\tadd t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_minus() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("add t0, t0, t1\n\0") - elsif current_character = '*' then - source_code_position := source_code_position + 1; + _write_z("\tsub t0, t1, t0\n\0"); + elsif token_kind = _lexer_token_kind_multiplication() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tmul t0, t0, t1\n\0") - elsif current_character = '&' then - source_code_position := source_code_position + 1; + elsif token_kind = _lexer_token_kind_and() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) _write_z("\tand t0, t0, t1\n\0") - elsif current_character = 'o' then - source_code_position := source_code_position + 2; + elsif token_kind = _lexer_token_kind_or() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("or t0, t0, t1\n\0") - elsif current_character = 'x' then - source_code_position := source_code_position + 3; + _write_z("\tor t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_xor() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("xor t0, t0, t1\n\0") - elsif current_character = '=' then - source_code_position := source_code_position + 1; + _write_z("\txor t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_equals() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("xor t0, t0, t1\nseqz t0, t0\n\0") - elsif current_character = '%' then - source_code_position := source_code_position + 1; + _write_z("\txor t0, t0, t1\n\tseqz t0, t0\n\0") + elsif token_kind = _lexer_token_kind_remainder() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("rem t0, t1, t0\n\0") - elsif current_character = '/' then - source_code_position := source_code_position + 1; + _write_z("\trem t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_division() then + _lexer_skip_token(); _compile_binary_rhs(); (* Execute the operation. *) - _write_z("div t0, t1, t0\n\0") - elsif current_character = '<' then - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); - - if current_character = '>' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\txor t0, t0, t1\nsnez t0, t0\n\0") - elsif current_character = '=' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\nxori t0, t0, 1\n\0") - else - _compile_binary_rhs(); + _write_z("\tdiv t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_less_than() then + _lexer_skip_token(); + _compile_binary_rhs(); - (* Execute the operation. *) - _write_z("slt t0, t1, t0\n\0") - end - elsif current_character = '>' then - source_code_position := source_code_position + 1; - current_character := _load_byte(source_code_position); - if current_character = '=' then - source_code_position := source_code_position + 1; - _compile_binary_rhs(); - - (* Execute the operation. *) - _write_z("\tslt t0, t1, t0\nxori t0, t0, 1\n\0") - else - _compile_binary_rhs(); + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\0") + elsif token_kind = _lexer_token_kind_greater_than() then + _lexer_skip_token(); + _compile_binary_rhs(); - (* Execute the operation. *) - _write_z("\tslt t0, t0, t1\n\0") - end + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\0") + elsif token_kind = _lexer_token_kind_less_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t0, t1\n\txori t0, t0, 1\n\0") + elsif token_kind = _lexer_token_kind_not_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\txor t0, t0, t1\n\tsnez t0, t0\n\0") + elsif token_kind = _lexer_token_kind_greater_equal() then + _lexer_skip_token(); + _compile_binary_rhs(); + + (* Execute the operation. *) + _write_z("\tslt t0, t1, t0\n\txori t0, t0, 1\n\0") end; .compile_expression_end; @@ -622,18 +621,18 @@ var stack_offset: Word; token_kind: Word; begin - name_length := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); - name_length := _lexer_global_end(); - name_length := _load_word(name_length) + -name; + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; argument_count := 0; (* Skip the identifier and left paren. *) _lexer_skip_token(); - source_code_position := source_code_position + 1; + _lexer_read_token(@token_kind); + _lexer_skip_token(); - if _load_byte(source_code_position) = ')' then + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_right_paren() then goto .compile_call_finalize end; .compile_call_loop; @@ -651,10 +650,12 @@ begin (* Add one to the argument counter. *) argument_count := argument_count + 1; - if _load_byte(source_code_position) <> ',' then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_comma() then goto .compile_call_finalize end; - source_code_position := source_code_position + 2; + _lexer_skip_token(); goto .compile_call_loop; .compile_call_finalize; @@ -680,25 +681,32 @@ begin .compile_call_end; _write_z("\tcall \0"); _write_s(name, name_length); + _write_c('\n'); (* Skip the right paren. *) - source_code_position := source_code_position + 1 + _lexer_read_token(@token_kind); + _lexer_skip_token() end; proc _compile_goto(); var next_token: Word; + next_length: Word; token_kind: Word; begin - _lexer_read_token(@token_kind); _lexer_skip_token(); + _lexer_read_token(@token_kind); - source_code_position := source_code_position + 2; + if token_kind = _lexer_token_kind_dot() then + _lexer_skip_token(); + _lexer_read_token(@token_kind) + end; + next_token := _lexer_global_get_start(); + next_length := _lexer_global_get_end() + -next_token; - next_token := _lexer_read_token(@token_kind); _write_z("\tj .\0"); - _write_token(next_token); + _write_s(next_token, next_length); _lexer_skip_token() end; @@ -717,11 +725,14 @@ proc _compile_global_designator(); var name: Word; token_kind: Word; + token_length: Word; begin _write_z("\tla t0, \0"); - name := _lexer_read_token(@token_kind); - _write_token(name); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + token_length := _lexer_global_get_end() + -name; + _write_s(name, token_length); _lexer_skip_token(); _write_c('\n') @@ -734,21 +745,26 @@ var token_kind: Word; name: Word; begin - name_token := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); - name_token := _lexer_global_end(); - name_token := _load_word(name_token) + -name; + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_token := _lexer_global_get_end() + -name; lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token); if lookup_result <> 0 then _compile_local_designator(lookup_result) else _compile_global_designator() + end; + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_hat() then + _lexer_skip_token(); + _write_z("\tlw t0, (t0)\n\0") end end; proc _compile_assignment(); +var + token_kind: Word; begin _compile_designator(); @@ -756,12 +772,13 @@ begin _write_z("\tsw t0, 60(sp)\n\0"); (* Skip the assignment sign (:=) with surrounding whitespaces. *) - source_code_position := source_code_position + 4; + _lexer_read_token(@token_kind); + _lexer_skip_token(); (* Compile the assignment. *) _compile_expression(); - _write_z("\tlw t1, 60(sp)\nsw t0, (t1)\n\0") + _write_z("\tlw t1, 60(sp)\n\tsw t0, (t1)\n\0") end; proc _compile_return_statement(); @@ -771,9 +788,8 @@ begin (* Skip "return" keyword and whitespace after it. *) _lexer_read_token(@token_kind); _lexer_skip_token(); - source_code_position := source_code_position + 1; - _compile_expression(); + _compile_expression(); _write_z("\tmv a0, t0\n\0") end; @@ -794,8 +810,6 @@ var condition_label: Word; token_kind: Word; begin - source_code_position := source_code_position + 1; - (* Compile condition. *) _compile_expression(); (* Skip " then" with newline. *) @@ -810,7 +824,7 @@ begin _write_label(condition_label); _write_c('\n'); - _compile_procedure_body(); + _compile_statement_list(); _write_z("\tj \0"); _write_label(after_end_label); @@ -839,7 +853,7 @@ begin _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_else() then _lexer_skip_token(); - _compile_procedure_body() + _compile_statement_list() elsif token_kind = _lexer_token_kind_elsif() then _lexer_skip_token(); _compile_condition(after_end_label); @@ -859,11 +873,10 @@ var name: Word; begin (* Skip the dot. *) - _lexer_read_token(@token_kind); _lexer_skip_token(); - label_token := _lexer_read_token(@token_kind); - name := _lexer_global_start(); - name := _load_word(name); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + label_token := _lexer_global_get_end() + -name; _write_c('.'); _write_s(name, label_token); _write_z(":\n\0"); @@ -886,8 +899,7 @@ begin elsif token_kind = _lexer_token_kind_dot() then _compile_label_declaration() elsif token_kind = _lexer_token_kind_identifier() then - current_byte := _lexer_global_start(); - current_byte := _load_word(current_byte); + current_byte := _lexer_global_get_start(); current_byte := _load_byte(current_byte); (* This is a call if the statement starts with an underscore. *) @@ -900,7 +912,7 @@ begin _write_c('\n') end; -proc _compile_procedure_body(); +proc _compile_statement_list(); var token_kind: Word; begin @@ -910,7 +922,7 @@ begin if token_kind = _lexer_token_kind_semicolon() then _lexer_skip_token(); - _compile_procedure_body() + _compile_statement_list() end; _skip_empty_lines() end; @@ -925,33 +937,139 @@ end; proc _write_register(register_character: Word, register_number: Word); begin _write_c(register_character); - register_number := register_number + '0'; - _write_c(register_number) + _write_c(register_number + '0') +end; + +proc _type_get_kind(this: Word); + return _load_word(this) +end; + +proc _type_set_kind(this: Word, value: Word); +begin + _store_word(value, this) +end; + +proc _type_get_size(this: Word); + return _load_word(this + 4) +end; + +proc _type_set_size(this: Word, value: Word); +begin + _store_word(value, this + 4) end; -proc _skip_spaces(); +proc _enumeration_type_get_members(this: Word); + return _load_word(this + 8) +end; + +proc _enumeration_type_set_members(this: Word, value: Word); +begin + _store_word(value, this + 8) +end; + +proc _enumeration_type_get_length(this: Word); + return _load_word(this + 12) +end; + +proc _enumeration_type_set_length(this: Word, value: Word); +begin + _store_word(value, this + 12) +end; + +(** + * Reads and creates enumeration type representation. + * + * record + * type_kind: Word; + * size: Word; + * members: StringArray; + * length: Word + * end; + * + * Returns enumeration type description. + *) +proc _read_type_enumeration(); var - current_byte: Word; - lhs: Word; - rhs: Word; + token_kind: Word; + enumeration_name: Word; + name_length: Word; + memory_start: Word; + member_count: Word; + result: Word; begin - current_byte := _load_byte(source_code_position); - lhs := current_byte = '\t'; - rhs := current_byte = ' '; + _lexer_skip_token(); + memory_start := memory_free_pointer; + member_count := 0; - if lhs or rhs then - source_code_position := source_code_position + 1; - _skip_spaces() - end + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_right_paren() then + goto .read_type_enumeration_end + end; + .read_type_enumeration_loop; + member_count := member_count + 1; + + enumeration_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -enumeration_name; + + _store_word(enumeration_name, memory_free_pointer); + memory_free_pointer := memory_free_pointer + 4; + + _store_word(name_length, memory_free_pointer); + memory_free_pointer := memory_free_pointer + 4; + + (* Skip the identifier. *) + _lexer_skip_token(); + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_comma() then + _lexer_skip_token(); + _lexer_read_token(@token_kind); + goto .read_type_enumeration_loop + end; + + .read_type_enumeration_end; + _lexer_skip_token(); + + (* The resulting structure is 16 bytes long. *) + result := memory_free_pointer; + memory_free_pointer := memory_free_pointer + 16; + + (* ENUMERATION_TYPE is 2. *) + _type_set_kind(result, 2); + _type_set_size(result, 4); + _enumeration_type_set_members(result, memory_start); + _enumeration_type_set_length(result, member_count); + + return result end; proc _read_type_expression(); var - type_name: Word; token_kind: Word; + type_name: Word; + name_length: Word; + result: Word; begin - type_name := _lexer_read_token(@token_kind); - _lexer_skip_token() + result := 0; + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_identifier() then + (* Named type. *) + type_name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -type_name; + result := _symbol_table_lookup(@symbol_table_global, type_name, name_length); + result := _type_info_get_type(result); + + _lexer_skip_token() + elsif token_kind = _lexer_token_kind_left_paren() then + result := _read_type_enumeration() + end; + + return result +end; + +proc _type_info_get_type(this: Word); + return _load_word(this + 4) end; (** @@ -980,10 +1098,10 @@ begin return result end; -proc _parameter_info_get_offset(info: Word); +proc _parameter_info_get_offset(this: Word); begin - info := info + 4; - return _load_word(info) + this := this + 4; + return _load_word(this) end; (** @@ -1011,10 +1129,10 @@ begin return result end; -proc _temporary_info_get_offset(info: Word); +proc _temporary_info_get_offset(this: Word); begin - info := info + 4; - return _load_word(info) + this := this + 4; + return _load_word(this) end; (** @@ -1029,12 +1147,14 @@ var token_kind: Word; begin (* Read the parameter name. *) - name_position := source_code_position; - name_length := _lexer_read_token(@token_kind); + _lexer_read_token(@token_kind); + name_position := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_position; _lexer_skip_token(); (* Skip colon and space in front of the type expression. *) - source_code_position := source_code_position + 2; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_type_expression(); @@ -1054,23 +1174,28 @@ end; proc _read_procedure_parameters(); var parameter_counter: Word; + token_kind: Word; begin (* Skip open paren. *) - source_code_position := source_code_position + 1; + _lexer_read_token(@token_kind); + _lexer_skip_token(); parameter_counter := 0; .compile_procedure_prologue_skip; - if _load_byte(source_code_position) <> ')' then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_right_paren() then _read_procedure_parameter(parameter_counter); parameter_counter := parameter_counter + 1; + _lexer_read_token(@token_kind); - if _load_byte(source_code_position) = ',' then - source_code_position := source_code_position + 2; + if token_kind = _lexer_token_kind_comma() then + _lexer_skip_token(); goto .compile_procedure_prologue_skip end end; (* Skip close paren. *) - source_code_position := source_code_position + 1 + _lexer_skip_token() end; (** @@ -1084,13 +1209,14 @@ var name_position: Word; token_kind: Word; begin - _skip_spaces(); - name_position := source_code_position; + _lexer_read_token(@token_kind); + name_position := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_position; + _lexer_skip_token(); (* Read and skip variable name, colon and the space *) - name_length := _lexer_read_token(@token_kind); - _lexer_skip_token(name_length); - source_code_position := source_code_position + 2; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_type_expression(); @@ -1098,49 +1224,58 @@ begin _symbol_table_enter(@symbol_table_local, name_position, name_length, info); (* Skip semicolon and newline after the variable declaration *) - source_code_position := source_code_position + 2 + _lexer_read_token(@token_kind); + _lexer_skip_token() end; proc _read_procedure_temporaries(); var temporary_counter: Word; + token_kind: Word; begin - if _memcmp(source_code_position, "var", 3) <> 0 then - goto .read_local_variables_end - end; - source_code_position := source_code_position + 4; - temporary_counter := 0; + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_var() then + _lexer_skip_token(); + temporary_counter := 0; - .read_local_variables_loop; - if _memcmp(source_code_position, "begin", 5) <> 0 then - _read_procedure_temporary(temporary_counter); + .read_local_variables_loop; + _lexer_read_token(@token_kind); - temporary_counter := temporary_counter + 1; - goto .read_local_variables_loop - end; - .read_local_variables_end + if token_kind = _lexer_token_kind_identifier() then + _read_procedure_temporary(temporary_counter); + + temporary_counter := temporary_counter + 1; + goto .read_local_variables_loop + end + end end; proc _compile_procedure(); var + name_pointer: Word; name_length: Word; token_kind: Word; begin (* Skip "proc ". *) - source_code_position := source_code_position + 5; + _lexer_read_token(@token_kind); + _lexer_skip_token(); + (* Clear local symbol table. *) _store_word(0, @symbol_table_local); - name_length := _lexer_read_token(@token_kind); + _lexer_read_token(@token_kind); + name_pointer := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name_pointer; (* Write .type _procedure_name, @function. *) _write_z(".type \0"); - _write_token(name_length); + _write_s(name_pointer, name_length); _write_z(", @function\n\0"); (* Write procedure label, _procedure_name: *) - _write_token(name_length); + _write_s(name_pointer, name_length); _write_z(":\n\0"); (* Skip procedure name. *) @@ -1149,16 +1284,16 @@ begin _read_procedure_parameters(); (* Skip semicolon and newline. *) - source_code_position := source_code_position + 2; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _read_procedure_temporaries(); (* Skip semicolon, "begin" and newline. *) _lexer_read_token(@token_kind); if token_kind = _lexer_token_kind_begin() then _lexer_skip_token(); - _compile_procedure_body() - end; - if token_kind = _lexer_token_kind_return() then + _compile_statement_list() + elsif token_kind = _lexer_token_kind_return() then _compile_return_statement() end; @@ -1166,94 +1301,79 @@ begin _write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0"); (* Skip the "end" keyword, semicolon and newline. *) - source_code_position := source_code_position + 5 -end; - -(** - * Prints and skips a line. - *) -proc _skip_comment(); -var - token_kind: Word; -begin _lexer_read_token(@token_kind); _lexer_skip_token(); - source_code_position := source_code_position + 1 + _lexer_read_token(@token_kind); + _lexer_skip_token() end; (** - * Skip newlines and comments. + * Skips comments. *) proc _skip_empty_lines(); var - current_position: Word; - current_byte: Word; + token_kind: Word; begin .skip_empty_lines_rerun; - current_position := source_code_position; - - .skip_empty_lines_loop; - current_byte := _load_byte(current_position); - - if current_byte = '\n' then - source_code_position := current_position + 1; - _skip_empty_lines() - elsif current_byte = '\t' then - current_position := current_position + 1; - goto .skip_empty_lines_loop - elsif current_byte = '(' then - current_byte := _load_byte(current_position + 1); - - if current_byte = '*' then - source_code_position := current_position; - _skip_comment(); - goto .skip_empty_lines_rerun - end; + + _lexer_read_token(@token_kind); + + if token_kind = _lexer_token_kind_comment() then + _lexer_skip_token(); + goto .skip_empty_lines_rerun end end; +(** + * Compile global variable initializer. + *) proc _compile_global_initializer(); var current_byte: Word; length: Word; token_kind: Word; + token_start: Word; begin - current_byte := _load_byte(source_code_position); + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + current_byte := _load_byte(token_start); - if current_byte = '"' then + if token_kind = _lexer_token_kind_string() then _write_z("\n\t.word strings + \0"); - length := _string_length(source_code_position); + length := _string_length(token_start); - _add_string(source_code_position); + _add_string(token_start); _write_i(); (* Skip the quoted string. *) - source_code_position := source_code_position + length; - source_code_position := source_code_position + 2; + _lexer_skip_token(); goto .compile_global_initializer_end elsif current_byte = 'S' then (* Skip "S(". *) - source_code_position := source_code_position + 2; + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _lexer_read_token(@token_kind); - if _load_byte(source_code_position) = ')' then + if token_kind = _lexer_token_kind_right_paren() then goto .compile_global_initializer_closing end; goto .compile_global_initializer_loop - elsif current_byte = '@' then + elsif token_kind = _lexer_token_kind_at() then (* Skip @. *) - source_code_position := source_code_position + 1; + _lexer_skip_token(); _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(@token_kind); - _write_token(current_byte); + _lexer_read_token(@token_kind); + token_start := _lexer_global_get_start(); + _write_s(token_start, _lexer_global_get_end() + -token_start); _lexer_skip_token(); goto .compile_global_initializer_end - elsif _is_digit(current_byte) = 1 then + elsif token_kind = _lexer_token_kind_integer() then _write_z("\n\t.word \0"); - current_byte := _lexer_read_token(@token_kind); - _write_token(current_byte); - source_code_position := source_code_position + 1; + _write_s(token_start, _lexer_global_get_end() + -token_start); + _lexer_skip_token(); goto .compile_global_initializer_end end; @@ -1261,43 +1381,85 @@ begin .compile_global_initializer_loop; _compile_global_initializer(); - if _load_byte(source_code_position) <> ')' then + _lexer_read_token(@token_kind); + if token_kind <> _lexer_token_kind_right_paren() then (* Skip comma and whitespace after it. *) - source_code_position := source_code_position + 2; + _lexer_skip_token(); goto .compile_global_initializer_loop end; .compile_global_initializer_closing; (* Skip ")" *) - source_code_position := source_code_position + 1; + _lexer_skip_token(); .compile_global_initializer_end end; proc _compile_constant_declaration(); var + name: Word; name_length: Word; token_kind: Word; begin - name_length := _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; _write_z(".type \0"); - _write_token(name_length); + _write_s(name, name_length); _write_z(", @object\n\0"); - _write_token(name_length); + _write_s(name, name_length); _write_c(':'); (* Skip the constant name with assignment sign and surrounding whitespaces. *) _lexer_skip_token(); - source_code_position := source_code_position + 4; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _compile_global_initializer(); + (* Skip semicolon and newline. *) - source_code_position := source_code_position + 2; + _lexer_read_token(@token_kind); + _lexer_skip_token(); _write_c('\n') end; +proc _compile_type_declaration(); +var + token_kind: Word; +begin + _lexer_skip_token(); + _lexer_read_token(@token_kind); + _lexer_skip_token(); + _read_type_expression(); + _lexer_read_token(@token_kind); + _lexer_skip_token() +end; + +proc _compile_type_part(); +var + token_kind: Word; +begin + _skip_empty_lines(); + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_type() then + goto .compile_type_part_end + end; + _lexer_skip_token(); + + .compile_type_part_loop; + _skip_empty_lines(); + + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_identifier() then + _compile_type_declaration(); + goto .compile_type_part_loop + end; + + .compile_type_part_end +end; + proc _compile_const_part(); var token_kind: Word; @@ -1317,8 +1479,8 @@ begin (* If the character at the line beginning is not indentation, it is probably the next code section. *) - if _load_byte(source_code_position) = '\t' then - source_code_position := source_code_position + 1; + _lexer_read_token(@token_kind); + if token_kind = _lexer_token_kind_identifier() then _compile_constant_declaration(); goto .compile_const_part_loop end; @@ -1328,16 +1490,19 @@ end; proc _compile_variable_declaration(); var + name: Word; name_length: Word; token_kind: Word; begin - name_length := _lexer_read_token(@token_kind); + _lexer_read_token(@token_kind); + name := _lexer_global_get_start(); + name_length := _lexer_global_get_end() + -name; _write_z(".type \0"); - _write_token(name_length); + _write_s(name, name_length); _write_z(", @object\n\0"); - _write_token(name_length); + _write_s(name, name_length); _write_c(':'); (* Skip the variable name and colon with space before the type. *) @@ -1346,13 +1511,16 @@ begin _lexer_skip_token(); _read_type_expression(); - if _load_byte(source_code_position) <> ' ' then + _lexer_read_token(@token_kind); + + if token_kind <> _lexer_token_kind_assignment() then (* Else we assume this is a zeroed 81920 bytes big array. *) _write_z(" .zero 81920\0") else (* Skip the assignment sign with surrounding whitespaces. *) - source_code_position := source_code_position + 4; - _compile_global_initializer() + _lexer_skip_token(); + _compile_global_initializer(); + _lexer_read_token(@token_kind) end; (* Skip semicolon and newline. *) @@ -1390,7 +1558,10 @@ end; * Process the source code and print the generated code. *) proc _compile_module(); +var + token_kind: Word; begin + _compile_type_part(); _compile_const_part(); _skip_empty_lines(); _compile_var_part(); @@ -1404,15 +1575,12 @@ begin .compile_module_loop; _skip_empty_lines(); + _lexer_read_token(@token_kind); - if _load_byte(source_code_position) <> 0 then - (* 5 is "proc " length. Space is needed to distinguish from "procedure". *) - if _memcmp(source_code_position, "proc ", 5) = 0 then - _compile_procedure(); - goto .compile_module_loop - end - end; - .compile_module_end + if token_kind = _lexer_token_kind_proc() then + _compile_procedure(); + goto .compile_module_loop + end end; proc _compile(); @@ -1551,7 +1719,7 @@ begin _symbol_table_enter(@symbol_table_global, symbol_builtin_name_word, 4, @symbol_type_info_word); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_pointer, 7, @symbol_type_info_pointer); _symbol_table_enter(@symbol_table_global, symbol_builtin_name_char, 4, @symbol_type_info_char); - _symbol_table_enter(@symbol_table_global, symbol_builtin_name_bool, 4, @symbol_type_info_bool) + _symbol_table_enter(@symbol_table_global, symbol_builtin_name_array, 5, @symbol_type_info_array) end; @@ -1600,7 +1768,7 @@ proc _lexer_class_asterisk(); return 9 end; -proc _lexer_class_underscore(); +proc _lexer_class_backslash(); return 10 end; @@ -1668,54 +1836,58 @@ proc _lexer_state_decimal(); return 4 end; -proc _lexer_state_greater(); +proc _lexer_state_leading_zero(); return 5 end; -proc _lexer_state_minus(); +proc _lexer_state_greater(); return 6 end; -proc _lexer_state_left_paren(); +proc _lexer_state_minus(); return 7 end; -proc _lexer_state_less(); +proc _lexer_state_left_paren(); return 8 end; -proc _lexer_state_dot(); +proc _lexer_state_less(); return 9 end; -proc _lexer_state_comment(); +proc _lexer_state_dot(); return 10 end; -proc _lexer_state_closing_comment(); +proc _lexer_state_comment(); return 11 end; -proc _lexer_state_character(); +proc _lexer_state_closing_comment(); return 12 end; -proc _lexer_state_string(); +proc _lexer_state_character(); return 13 end; -proc _lexer_state_leading_zero(); +proc _lexer_state_character_escape(); return 14 end; -proc _lexer_state_decimal_suffix(); +proc _lexer_state_string(); return 15 end; -proc _lexer_state_end(); +proc _lexer_state_string_escape(); return 16 end; +proc _lexer_state_end(); + return 17 +end; + proc _lexer_action_none(); return 1 end; @@ -1793,140 +1965,140 @@ proc _lexer_classifications(); var code: Word; begin - _assign_at(@classification, 1, 15); - _assign_at(@classification, 2, 1); - _assign_at(@classification, 3, 1); - _assign_at(@classification, 4, 1); - _assign_at(@classification, 5, 1); - _assign_at(@classification, 6, 1); - _assign_at(@classification, 7, 1); - _assign_at(@classification, 8, 1); - _assign_at(@classification, 9, 1); - _assign_at(@classification, 10, 4); - _assign_at(@classification, 11, 4); - _assign_at(@classification, 12, 1); - _assign_at(@classification, 13, 1); - _assign_at(@classification, 14, 4); - _assign_at(@classification, 15, 1); - _assign_at(@classification, 16, 1); - _assign_at(@classification, 17, 1); - _assign_at(@classification, 18, 1); - _assign_at(@classification, 19, 1); - _assign_at(@classification, 20, 1); - _assign_at(@classification, 21, 1); - _assign_at(@classification, 22, 1); - _assign_at(@classification, 23, 1); - _assign_at(@classification, 24, 1); - _assign_at(@classification, 25, 1); - _assign_at(@classification, 26, 1); - _assign_at(@classification, 27, 1); - _assign_at(@classification, 28, 1); - _assign_at(@classification, 29, 1); - _assign_at(@classification, 30, 1); - _assign_at(@classification, 31, 1); - _assign_at(@classification, 32, 1); - _assign_at(@classification, 33, 4); - _assign_at(@classification, 34, 11); - _assign_at(@classification, 35, 19); - _assign_at(@classification, 36, 22); - _assign_at(@classification, 37, 22); - _assign_at(@classification, 38, 11); - _assign_at(@classification, 39, 11); - _assign_at(@classification, 40, 18); - _assign_at(@classification, 41, 7); - _assign_at(@classification, 42, 8); - _assign_at(@classification, 43, 9); - _assign_at(@classification, 44, 11); - _assign_at(@classification, 45, 11); - _assign_at(@classification, 46, 17); - _assign_at(@classification, 47, 16); - _assign_at(@classification, 48, 11); - _assign_at(@classification, 49, 13); - _assign_at(@classification, 50, 2); - _assign_at(@classification, 51, 2); - _assign_at(@classification, 52, 2); - _assign_at(@classification, 53, 2); - _assign_at(@classification, 54, 2); - _assign_at(@classification, 55, 2); - _assign_at(@classification, 56, 2); - _assign_at(@classification, 57, 2); - _assign_at(@classification, 58, 2); - _assign_at(@classification, 59, 5); - _assign_at(@classification, 60, 11); - _assign_at(@classification, 61, 21); - _assign_at(@classification, 62, 6); - _assign_at(@classification, 63, 20); - _assign_at(@classification, 64, 22); - _assign_at(@classification, 65, 11); - _assign_at(@classification, 66, 3); - _assign_at(@classification, 67, 3); - _assign_at(@classification, 68, 3); - _assign_at(@classification, 69, 3); - _assign_at(@classification, 70, 3); - _assign_at(@classification, 71, 3); - _assign_at(@classification, 72, 3); - _assign_at(@classification, 73, 3); - _assign_at(@classification, 74, 3); - _assign_at(@classification, 75, 3); - _assign_at(@classification, 76, 3); - _assign_at(@classification, 77, 3); - _assign_at(@classification, 78, 3); - _assign_at(@classification, 79, 3); - _assign_at(@classification, 80, 3); - _assign_at(@classification, 81, 3); - _assign_at(@classification, 82, 3); - _assign_at(@classification, 83, 3); - _assign_at(@classification, 84, 3); - _assign_at(@classification, 85, 3); - _assign_at(@classification, 86, 3); - _assign_at(@classification, 87, 3); - _assign_at(@classification, 88, 3); - _assign_at(@classification, 89, 3); - _assign_at(@classification, 90, 3); - _assign_at(@classification, 91, 3); - _assign_at(@classification, 92, 11); - _assign_at(@classification, 93, 22); - _assign_at(@classification, 94, 11); - _assign_at(@classification, 95, 11); - _assign_at(@classification, 96, 10); - _assign_at(@classification, 97, 22); - _assign_at(@classification, 98, 12); - _assign_at(@classification, 99, 12); - _assign_at(@classification, 100, 12); - _assign_at(@classification, 101, 12); - _assign_at(@classification, 102, 12); - _assign_at(@classification, 103, 12); - _assign_at(@classification, 104, 3); - _assign_at(@classification, 105, 3); - _assign_at(@classification, 106, 3); - _assign_at(@classification, 107, 3); - _assign_at(@classification, 108, 3); - _assign_at(@classification, 109, 3); - _assign_at(@classification, 110, 3); - _assign_at(@classification, 111, 3); - _assign_at(@classification, 112, 3); - _assign_at(@classification, 113, 3); - _assign_at(@classification, 114, 3); - _assign_at(@classification, 115, 3); - _assign_at(@classification, 116, 3); - _assign_at(@classification, 117, 3); - _assign_at(@classification, 118, 3); - _assign_at(@classification, 119, 3); - _assign_at(@classification, 120, 3); - _assign_at(@classification, 121, 14); - _assign_at(@classification, 122, 3); - _assign_at(@classification, 123, 3); - _assign_at(@classification, 124, 22); - _assign_at(@classification, 125, 11); - _assign_at(@classification, 126, 22); - _assign_at(@classification, 127, 11); - _assign_at(@classification, 128, 1); + _assign_at(@classification, 1, _lexer_class_eof()); + _assign_at(@classification, 2, _lexer_class_invalid()); + _assign_at(@classification, 3, _lexer_class_invalid()); + _assign_at(@classification, 4, _lexer_class_invalid()); + _assign_at(@classification, 5, _lexer_class_invalid()); + _assign_at(@classification, 6, _lexer_class_invalid()); + _assign_at(@classification, 7, _lexer_class_invalid()); + _assign_at(@classification, 8, _lexer_class_invalid()); + _assign_at(@classification, 9, _lexer_class_invalid()); + _assign_at(@classification, 10, _lexer_class_space()); + _assign_at(@classification, 11, _lexer_class_space()); + _assign_at(@classification, 12, _lexer_class_invalid()); + _assign_at(@classification, 13, _lexer_class_invalid()); + _assign_at(@classification, 14, _lexer_class_space()); + _assign_at(@classification, 15, _lexer_class_invalid()); + _assign_at(@classification, 16, _lexer_class_invalid()); + _assign_at(@classification, 17, _lexer_class_invalid()); + _assign_at(@classification, 18, _lexer_class_invalid()); + _assign_at(@classification, 19, _lexer_class_invalid()); + _assign_at(@classification, 20, _lexer_class_invalid()); + _assign_at(@classification, 21, _lexer_class_invalid()); + _assign_at(@classification, 22, _lexer_class_invalid()); + _assign_at(@classification, 23, _lexer_class_invalid()); + _assign_at(@classification, 24, _lexer_class_invalid()); + _assign_at(@classification, 25, _lexer_class_invalid()); + _assign_at(@classification, 26, _lexer_class_invalid()); + _assign_at(@classification, 27, _lexer_class_invalid()); + _assign_at(@classification, 28, _lexer_class_invalid()); + _assign_at(@classification, 29, _lexer_class_invalid()); + _assign_at(@classification, 30, _lexer_class_invalid()); + _assign_at(@classification, 31, _lexer_class_invalid()); + _assign_at(@classification, 32, _lexer_class_invalid()); + _assign_at(@classification, 33, _lexer_class_space()); + _assign_at(@classification, 34, _lexer_class_single()); + _assign_at(@classification, 35, _lexer_class_double_quote()); + _assign_at(@classification, 36, _lexer_class_other()); + _assign_at(@classification, 37, _lexer_class_other()); + _assign_at(@classification, 38, _lexer_class_single()); + _assign_at(@classification, 39, _lexer_class_single()); + _assign_at(@classification, 40, _lexer_class_single_quote()); + _assign_at(@classification, 41, _lexer_class_left_paren()); + _assign_at(@classification, 42, _lexer_class_right_paren()); + _assign_at(@classification, 43, _lexer_class_asterisk()); + _assign_at(@classification, 44, _lexer_class_single()); + _assign_at(@classification, 45, _lexer_class_single()); + _assign_at(@classification, 46, _lexer_class_minus()); + _assign_at(@classification, 47, _lexer_class_dot()); + _assign_at(@classification, 48, _lexer_class_single()); + _assign_at(@classification, 49, _lexer_class_zero()); + _assign_at(@classification, 50, _lexer_class_digit()); + _assign_at(@classification, 51, _lexer_class_digit()); + _assign_at(@classification, 52, _lexer_class_digit()); + _assign_at(@classification, 53, _lexer_class_digit()); + _assign_at(@classification, 54, _lexer_class_digit()); + _assign_at(@classification, 55, _lexer_class_digit()); + _assign_at(@classification, 56, _lexer_class_digit()); + _assign_at(@classification, 57, _lexer_class_digit()); + _assign_at(@classification, 58, _lexer_class_digit()); + _assign_at(@classification, 59, _lexer_class_colon()); + _assign_at(@classification, 60, _lexer_class_single()); + _assign_at(@classification, 61, _lexer_class_less()); + _assign_at(@classification, 62, _lexer_class_equals()); + _assign_at(@classification, 63, _lexer_class_greater()); + _assign_at(@classification, 64, _lexer_class_other()); + _assign_at(@classification, 65, _lexer_class_single()); + _assign_at(@classification, 66, _lexer_class_alpha()); + _assign_at(@classification, 67, _lexer_class_alpha()); + _assign_at(@classification, 68, _lexer_class_alpha()); + _assign_at(@classification, 69, _lexer_class_alpha()); + _assign_at(@classification, 70, _lexer_class_alpha()); + _assign_at(@classification, 71, _lexer_class_alpha()); + _assign_at(@classification, 72, _lexer_class_alpha()); + _assign_at(@classification, 73, _lexer_class_alpha()); + _assign_at(@classification, 74, _lexer_class_alpha()); + _assign_at(@classification, 75, _lexer_class_alpha()); + _assign_at(@classification, 76, _lexer_class_alpha()); + _assign_at(@classification, 77, _lexer_class_alpha()); + _assign_at(@classification, 78, _lexer_class_alpha()); + _assign_at(@classification, 79, _lexer_class_alpha()); + _assign_at(@classification, 80, _lexer_class_alpha()); + _assign_at(@classification, 81, _lexer_class_alpha()); + _assign_at(@classification, 82, _lexer_class_alpha()); + _assign_at(@classification, 83, _lexer_class_alpha()); + _assign_at(@classification, 84, _lexer_class_alpha()); + _assign_at(@classification, 85, _lexer_class_alpha()); + _assign_at(@classification, 86, _lexer_class_alpha()); + _assign_at(@classification, 87, _lexer_class_alpha()); + _assign_at(@classification, 88, _lexer_class_alpha()); + _assign_at(@classification, 89, _lexer_class_alpha()); + _assign_at(@classification, 90, _lexer_class_alpha()); + _assign_at(@classification, 91, _lexer_class_alpha()); + _assign_at(@classification, 92, _lexer_class_single()); + _assign_at(@classification, 93, _lexer_class_backslash()); + _assign_at(@classification, 94, _lexer_class_single()); + _assign_at(@classification, 95, _lexer_class_single()); + _assign_at(@classification, 96, _lexer_class_alpha()); + _assign_at(@classification, 97, _lexer_class_other()); + _assign_at(@classification, 98, _lexer_class_hex()); + _assign_at(@classification, 99, _lexer_class_hex()); + _assign_at(@classification, 100, _lexer_class_hex()); + _assign_at(@classification, 101, _lexer_class_hex()); + _assign_at(@classification, 102, _lexer_class_hex()); + _assign_at(@classification, 103, _lexer_class_hex()); + _assign_at(@classification, 104, _lexer_class_alpha()); + _assign_at(@classification, 105, _lexer_class_alpha()); + _assign_at(@classification, 106, _lexer_class_alpha()); + _assign_at(@classification, 107, _lexer_class_alpha()); + _assign_at(@classification, 108, _lexer_class_alpha()); + _assign_at(@classification, 109, _lexer_class_alpha()); + _assign_at(@classification, 110, _lexer_class_alpha()); + _assign_at(@classification, 111, _lexer_class_alpha()); + _assign_at(@classification, 112, _lexer_class_alpha()); + _assign_at(@classification, 113, _lexer_class_alpha()); + _assign_at(@classification, 114, _lexer_class_alpha()); + _assign_at(@classification, 115, _lexer_class_alpha()); + _assign_at(@classification, 116, _lexer_class_alpha()); + _assign_at(@classification, 117, _lexer_class_alpha()); + _assign_at(@classification, 118, _lexer_class_alpha()); + _assign_at(@classification, 119, _lexer_class_alpha()); + _assign_at(@classification, 120, _lexer_class_alpha()); + _assign_at(@classification, 121, _lexer_class_x()); + _assign_at(@classification, 122, _lexer_class_alpha()); + _assign_at(@classification, 123, _lexer_class_alpha()); + _assign_at(@classification, 124, _lexer_class_other()); + _assign_at(@classification, 125, _lexer_class_single()); + _assign_at(@classification, 126, _lexer_class_other()); + _assign_at(@classification, 127, _lexer_class_single()); + _assign_at(@classification, 128, _lexer_class_invalid()); code := 129; (* Set the remaining 129 - 256 bytes to transitionClassOther. *) .create_classification_loop; - _assign_at(@classification, code, 22); + _assign_at(@classification, code, _lexer_class_other()); code := code + 1; if code < 257 then @@ -1990,7 +2162,7 @@ begin _lexer_set_transition(current_state, _lexer_class_left_paren(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_right_paren(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_asterisk(), default_action, next_state); - _lexer_set_transition(current_state, _lexer_class_underscore(), default_action, next_state); + _lexer_set_transition(current_state, _lexer_class_backslash(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_single(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_hex(), default_action, next_state); _lexer_set_transition(current_state, _lexer_class_zero(), default_action, next_state); @@ -2020,18 +2192,18 @@ begin _lexer_set_transition(_lexer_state_start(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); _lexer_set_transition(_lexer_state_start(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_space(), _lexer_action_skip(), _lexer_state_start()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_greater()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_colon(), _lexer_action_accumulate(), _lexer_state_colon()); _lexer_set_transition(_lexer_state_start(), _lexer_class_equals(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_left_paren(), _lexer_action_accumulate(), _lexer_state_left_paren()); _lexer_set_transition(_lexer_state_start(), _lexer_class_right_paren(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_asterisk(), _lexer_action_single(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_backslash(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_single(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_leading_zero()); _lexer_set_transition(_lexer_state_start(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_start(), _lexer_class_eof(), _lexer_action_eof(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_accumulate(), _lexer_state_dot()); + _lexer_set_transition(_lexer_state_start(), _lexer_class_dot(), _lexer_action_single(), _lexer_state_end()); _lexer_set_transition(_lexer_state_start(), _lexer_class_minus(), _lexer_action_accumulate(), _lexer_state_minus()); _lexer_set_transition(_lexer_state_start(), _lexer_class_single_quote(), _lexer_action_accumulate(), _lexer_state_character()); _lexer_set_transition(_lexer_state_start(), _lexer_class_double_quote(), _lexer_action_accumulate(), _lexer_state_string()); @@ -2047,7 +2219,6 @@ begin _lexer_default_transition(_lexer_state_identifier(), _lexer_action_key_id(), _lexer_state_end()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_identifier()); - _lexer_set_transition(_lexer_state_identifier(), _lexer_class_underscore(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_identifier()); _lexer_set_transition(_lexer_state_identifier(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_identifier()); @@ -2055,11 +2226,18 @@ begin (* Decimal state. *) _lexer_default_transition(_lexer_state_decimal(), _lexer_action_integer(), _lexer_state_end()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_digit(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_decimal(), _lexer_class_zero(), _lexer_action_accumulate(), _lexer_state_decimal()); - _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_accumulate(), _lexer_state_decimal_suffix()); + _lexer_set_transition(_lexer_state_decimal(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); + + (* Leading zero. *) + _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_dot()); (* Greater state. *) _lexer_default_transition(_lexer_state_greater(), _lexer_action_finalize(), _lexer_state_end()); @@ -2099,29 +2277,24 @@ begin _lexer_set_transition(_lexer_state_character(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_character(), _lexer_class_single_quote(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_character_escape()); + + (* Escape sequence in a character. *) + _lexer_default_transition(_lexer_state_character_escape(), _lexer_action_accumulate(), _lexer_state_character()); + _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_character_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); (* String. *) _lexer_default_transition(_lexer_state_string(), _lexer_action_accumulate(), _lexer_state_string()); _lexer_set_transition(_lexer_state_string(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()); _lexer_set_transition(_lexer_state_string(), _lexer_class_double_quote(), _lexer_action_delimited(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string(), _lexer_class_backslash(), _lexer_action_accumulate(), _lexer_state_string_escape()); - (* Leading zero. *) - _lexer_default_transition(_lexer_state_leading_zero(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_underscore(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_leading_zero(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()); - - (* Digit with a character suffix. *) - _lexer_default_transition(_lexer_state_decimal_suffix(), _lexer_action_integer(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_digit(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_alpha(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_hex(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_zero(), _lexer_action_none(), _lexer_state_end()); - _lexer_set_transition(_lexer_state_decimal_suffix(), _lexer_class_x(), _lexer_action_none(), _lexer_state_end()) + (* Escape sequence in a string. *) + _lexer_default_transition(_lexer_state_string_escape(), _lexer_action_accumulate(), _lexer_state_string()); + _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_invalid(), _lexer_action_none(), _lexer_state_end()); + _lexer_set_transition(_lexer_state_string_escape(), _lexer_class_eof(), _lexer_action_none(), _lexer_state_end()) end; (** @@ -2136,24 +2309,54 @@ end; (** * Lexer state is saved after the transition tables. * Each transition table entry is 8 bytes long. The table has 16 rows (transition states) - * and 22 columns (character classes), so 2816 = 8 * 16 * 22. + * and 22 columns (character classes), so 2992 = 8 * 17 * 22. *) proc _lexer_global_state(); - return _lexer_get_transition_table() + 2816 + return _lexer_get_transition_table() + 2992 end; (** * Gets pointer to the token start. *) -proc _lexer_global_start(); - return _lexer_global_state() + 4 +proc _lexer_global_get_start(); +var + target: Word; +begin + target := _lexer_global_state() + 4; + return _load_word(target) +end; + +(** + * Sets pointer to the token start. + *) +proc _lexer_global_set_start(new_start: Word); +var + target: Word; +begin + target := _lexer_global_state() + 4; + _store_word(new_start, target) end; (** * Gets pointer to the token end. *) -proc _lexer_global_end(); - return _lexer_global_start() + 4 +proc _lexer_global_get_end(); +var + target: Word; +begin + target := _lexer_global_state() + 8; + return _load_word(target) +end; + +(** + * Sets pointer to the token end. + *) +proc _lexer_global_set_end(new_start: Word); +var + target: Word; +begin + target := _lexer_global_state() + 8; + _store_word(new_start, target) end; proc _lexer_transition_get_action(transition: Word); @@ -2180,17 +2383,13 @@ end; proc _lexer_reset(); var state: Word; - current: Word; begin (* Transition start state is 1. *) state := _lexer_global_state(); _store_word(_lexer_state_start(), state); - current := _lexer_global_start(); - _store_word(source_code_position, current); - - current := _lexer_global_end(); - _store_word(source_code_position, current) + state := _lexer_global_get_start(); + _lexer_global_set_end(state) end; (** @@ -2199,7 +2398,10 @@ end; proc _lexer_initialize(); begin _lexer_classifications(); - _lexer_transitions() + _lexer_transitions(); + + _lexer_global_set_start(@source_code); + _lexer_global_set_end(@source_code) end; proc _lexer_next_transition(); @@ -2208,8 +2410,7 @@ var character_class: Word; current_state: Word; begin - current_character := _lexer_global_end(); - current_character := _load_word(current_character); + current_character := _lexer_global_get_end(); current_character := _load_byte(current_character); character_class := _get_at(@classification, current_character + 1); @@ -2452,7 +2653,7 @@ proc _lexer_token_kind_at(); return 57 end; -proc _lexer_token_kind_exclamation(); +proc _lexer_token_kind_comment(); return 58 end; @@ -2476,6 +2677,10 @@ proc _lexer_token_kind_goto(); return 63 end; +proc _lexer_token_kind_eof(); + return 64 +end; + proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word); var result: Word; @@ -2522,6 +2727,10 @@ begin result := _lexer_token_kind_else() elsif _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then result := _lexer_token_kind_elsif() + elsif _lexer_compare_keyword(position_start, token_length, "or", 2) = 1 then + result := _lexer_token_kind_or() + elsif _lexer_compare_keyword(position_start, token_length, "xor", 2) = 1 then + result := _lexer_token_kind_xor() end; return result end; @@ -2538,6 +2747,14 @@ begin result := _lexer_token_kind_colon() elsif character = '.' then result := _lexer_token_kind_dot() + elsif character = '(' then + result := _lexer_token_kind_left_paren() + elsif character = '-' then + result := _lexer_token_kind_minus() + elsif character = '<' then + result := _lexer_token_kind_less_than() + elsif character = '>' then + result := _lexer_token_kind_greater_than() end; return result end; @@ -2552,44 +2769,126 @@ begin if character = ';' then result := _lexer_token_kind_semicolon() + elsif character = ',' then + result := _lexer_token_kind_comma() + elsif character = ')' then + result := _lexer_token_kind_right_paren() + elsif character = '@' then + result := _lexer_token_kind_at() + elsif character = '~' then + result := _lexer_token_kind_not() + elsif character = '&' then + result := _lexer_token_kind_and() + elsif character = '+' then + result := _lexer_token_kind_plus() + elsif character = '*' then + result := _lexer_token_kind_multiplication() + elsif character = '=' then + result := _lexer_token_kind_equals() + elsif character = '%' then + result := _lexer_token_kind_remainder() + elsif character = '/' then + result := _lexer_token_kind_division() + elsif character = '.' then + result := _lexer_token_kind_dot() + elsif character = '^' then + result := _lexer_token_kind_hat() + end; + return result +end; + +proc _lexer_classify_composite(start_position: Word, one_before_last: Word); +var + first_character: Word; + last_character: Word; + result: Word; +begin + first_character := _load_byte(start_position); + last_character := _load_byte(one_before_last); + + if first_character = ':' then + result := _lexer_token_kind_assignment() + elsif first_character = '<' then + if last_character = '=' then + result := _lexer_token_kind_less_equal() + elsif last_character = '>' then + result := _lexer_token_kind_not_equal() + end + elsif first_character = '>' then + if last_character = '=' then + result := _lexer_token_kind_greater_equal() + end + end; + + return result +end; + +proc _lexer_classify_delimited(start_position: Word, end_position: Word); +var + token_length: Word; + delimiter: Word; + result: Word; +begin + token_length := end_position + -start_position; + delimiter := _load_byte(start_position); + + if delimiter = '(' then + result := _lexer_token_kind_comment() + elsif delimiter = '\'' then + result := _lexer_token_kind_character() + elsif delimiter = '"' then + result := _lexer_token_kind_string() end; return result end; +proc _lexer_classify_integer(start_position: Word, end_position: Word); +begin + return _lexer_token_kind_integer() +end; + proc _lexer_execute_action(action_to_perform: Word, kind: Word); var - pointer_start: Word; - pointer_end: Word; position_start: Word; position_end: Word; intermediate: Word; begin - pointer_start := _lexer_global_start(); - position_start := _load_word(pointer_start); - pointer_end := _lexer_global_end(); - position_end := _load_word(pointer_end); + position_start := _lexer_global_get_start(); + position_end := _lexer_global_get_end(); if action_to_perform = _lexer_action_none() then elsif action_to_perform = _lexer_action_accumulate() then - _store_word(position_end + 1, pointer_end) + _lexer_global_set_end(position_end + 1) elsif action_to_perform = _lexer_action_skip() then - _store_word(position_start + 1, pointer_start); - _store_word(position_end + 1, pointer_end) + _lexer_global_set_start(position_start + 1); + _lexer_global_set_end(position_end + 1) elsif action_to_perform = _lexer_action_single() then - _store_word(position_end + 1, pointer_end); + _lexer_global_set_end(position_end + 1); intermediate := _lexer_classify_single(position_start); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_eof() then + intermediate := _lexer_token_kind_eof(); + _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_finalize() then intermediate := _lexer_classify_finalize(position_start); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_composite() then + _lexer_global_set_end(position_end + 1); + + intermediate := _lexer_classify_composite(position_start, position_end); + _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_key_id() then intermediate := _lexer_classify_keyword(position_start, position_end); _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_integer() then + intermediate := _lexer_classify_integer(position_start, position_end); + _store_word(intermediate, kind) elsif action_to_perform = _lexer_action_delimited() then + _lexer_global_set_end(position_end + 1); + + intermediate := _lexer_classify_delimited(position_start, position_end + 1); + _store_word(intermediate, kind) end; end; @@ -2620,19 +2919,12 @@ begin end; (** - * Reads the next token. - * - * Returns token length in a0. + * Reads the next token and writes its type into the address in the kind parameter. *) proc _lexer_read_token(kind: Word); -var - new_position: Word; begin _lexer_reset(); - _lexer_advance_token(kind); - - new_position := _lexer_global_end(); - return _load_word(new_position) + -source_code_position + _lexer_advance_token(kind) end; (** @@ -2640,10 +2932,10 @@ end; *) proc _lexer_skip_token(); var - new_position: Word; + old_end: Word; begin - new_position := _lexer_global_end(); - source_code_position := _load_word(new_position) + old_end := _lexer_global_get_end(); + _lexer_global_set_start(old_end) end; (* @@ -2652,7 +2944,7 @@ end; proc _start(); var last_read: Word; - offset: Wort; + offset: Word; begin _lexer_initialize(); _symbol_table_build(); |
