Implement elsif for if-statements

author: Eugen Wissner <belka@caraus.de> 2025-09-23 22:22:38 +0200
committer: Eugen Wissner <belka@caraus.de> 2025-09-23 22:22:38 +0200
commit: 0cc41f2d838630f5117d57e1491ffd4a6d613832 (patch)
tree: 119f3f76ca5c6a0cdd817575e8df565519fd6a9c /boot/stage13.elna
parent: 6e9086aa26a37ef8d89dd54b773e614a80efe720 (diff)
download: elna-0cc41f2d838630f5117d57e1491ffd4a6d613832.tar.gz
1 files changed, 685 insertions, 164 deletions
diff --git a/boot/stage13.elna b/boot/stage13.elna
index 66f6593..925a1cd 100644
--- a/boot/stage13.elna
+++ b/boot/stage13.elna
@@ -5,6 +5,7 @@
 (* Stage 13 compiler. *)
 
 (* - Multiline comments. *)
+(* - elsif conditions. *)
 
 const
 	symbol_builtin_name_int := "Int";
@@ -114,7 +115,7 @@ end;
 (* Returns the amount of bytes written in a0. *)
 proc _read_file(buffer: Word, size: Word);
 begin
-	_syscall(0, buffer, size, 0, 0, 0, 63);
+	return _syscall(0, buffer, size, 0, 0, 0, 63)
 end;
 
 (* Writes to the standard output. *)
@@ -346,12 +347,6 @@ begin
 	return destination
 end;
 
-(* Advances the token stream by a0 bytes. *)
-proc _advance_token(count: Word);
-begin
-	source_code_position := source_code_position + count;
-end;
-
 (* Prints the current token. *)
 
 (* Parameters: *)
@@ -367,12 +362,13 @@ end;
 proc _compile_integer_literal();
 var
 	integer_token: Word;
+	token_kind: Word;
 begin
 	_write_z("\tli t0, \0");
 
-	integer_token := _lexer_read_token();
+	integer_token := _lexer_read_token(@token_kind);
 	_write_token(integer_token);
-	_advance_token(integer_token);
+	_lexer_skip_token();
 
 	_write_c('\n');
 end;
@@ -382,16 +378,16 @@ var
 	character: Word;
 begin
 	_write_z("\tli t0, '\0");
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 
 	character := _load_byte(source_code_position);
 	if character = '\\' then
 		_write_c('\\');
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 	end;
 	_write_s(source_code_position, 1);
 	_write_s("'\n", 2);
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 end;
 
 proc _compile_variable_expression();
@@ -403,14 +399,14 @@ end;
 proc _compile_address_expression();
 begin
 	(* Skip the "@" sign. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	_compile_designator();
 end;
 
 proc _compile_negate_expression();
 begin
 	(* Skip the "-" sign. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	_compile_term();
 
 	_write_z("\tneg t0, t0\n\0");
@@ -419,7 +415,7 @@ end;
 proc _compile_not_expression();
 begin
 	(* Skip the "~" sign. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	_compile_term();
 
 	_write_z("\tnot t0, t0\n\0");
@@ -433,7 +429,8 @@ begin
 	length := _string_length(source_code_position);
 	offset := _add_string(source_code_position);
 
-	_advance_token(length + 2);
+	source_code_position := source_code_position + length;
+	source_code_position := source_code_position + 2;
 	_write_z("\tla t0, strings\n\0");
 
 	_write_z("\tli t1, \0");
@@ -479,7 +476,7 @@ end;
 proc _compile_binary_rhs();
 begin
 	(* Skip the whitespace after the binary operator. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	_compile_term();
 
 	(* Load the left expression from the stack; *)
@@ -502,11 +499,11 @@ begin
 	_write_z("sw t0, 64(sp)\n\0");
 
 	(* Skip surrounding whitespace in front of the operator. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	current_character := _load_byte(source_code_position);
 
 	if current_character = '+' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -515,7 +512,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '*' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -524,7 +521,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '&' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -533,7 +530,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = 'o' then
-		_advance_token(2);
+		source_code_position := source_code_position + 2;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -542,7 +539,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = 'x' then
-		_advance_token(3);
+		source_code_position := source_code_position + 3;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -551,7 +548,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '=' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -560,7 +557,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '%' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -569,7 +566,7 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '/' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
@@ -578,11 +575,11 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '<' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		current_character := _load_byte(source_code_position);
 
 		if current_character = '>' then
-			_advance_token(1);
+			source_code_position := source_code_position + 1;
 			_compile_binary_rhs();
 
 			(* Execute the operation. *)
@@ -591,7 +588,7 @@ begin
 			goto .compile_expression_end;
 		end;
 		if current_character = '=' then
-			_advance_token(1);
+			source_code_position := source_code_position + 1;
 			_compile_binary_rhs();
 
 			(* Execute the operation. *)
@@ -607,10 +604,10 @@ begin
 		goto .compile_expression_end;
 	end;
 	if current_character = '>' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		current_character := _load_byte(source_code_position);
 		if current_character = '=' then
-			_advance_token(1);
+			source_code_position := source_code_position + 1;
 			_compile_binary_rhs();
 
 			(* Execute the operation. *)
@@ -621,7 +618,7 @@ begin
 		_compile_binary_rhs();
 
 		(* Execute the operation. *)
-		_write_z("\tslt t0, t1, t0\n\0");
+		_write_z("\tslt t0, t0, t1\n\0");
 
 		goto .compile_expression_end;
 	end;
@@ -635,16 +632,21 @@ var
 	name: Word;
 	argument_count: Word;
 	stack_offset: Word;
+	token_kind: Word;
 begin
-	name_length := _lexer_read_token();
-	name := source_code_position;
+	name_length := _lexer_read_token(@token_kind);
+	name := _lexer_global_start();
+	name := _load_word(name);
+	name_length := _lexer_global_end();
+	name_length := _load_word(name_length) + -name;
 	argument_count := 0;
 
 	(* Skip the identifier and left paren. *)
-	_advance_token(name_length + 1);
+	_lexer_skip_token();
+	source_code_position := source_code_position + 1;
 
 	if _load_byte(source_code_position) = ')' then
-		goto .compile_call_finalize
+		goto .compile_call_finalize;
 	end;
 	.compile_call_loop;
 	_compile_expression();
@@ -664,7 +666,7 @@ begin
 	if _load_byte(source_code_position) <> ',' then
 		goto .compile_call_finalize;
 	end;
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 	goto .compile_call_loop;
 
 	.compile_call_finalize;
@@ -692,43 +694,47 @@ begin
 	_write_s(name, name_length);
 
 	(* Skip the right paren. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 end;
 
 proc _compile_goto();
 var
 	next_token: Word;
+	token_kind: Word;
 begin
-	_advance_token(6);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+
+	source_code_position := source_code_position + 2;
 
-	next_token := _lexer_read_token();
+	next_token := _lexer_read_token(@token_kind);
 	_write_z("\tj .\0");
 
 	_write_token(next_token);
-	_advance_token(next_token);
+	_lexer_skip_token();
 end;
 
-proc _compile_local_designator(symbol: Word, name_length: Word);
+proc _compile_local_designator(symbol: Word);
 var
 	variable_offset: Word;
 begin
-
 	_write_z("\taddi t0, sp, \0");
 	variable_offset := _parameter_info_get_offset(symbol);
 	_write_i(variable_offset);
 	_write_c('\n');
-	_advance_token(name_length);
+	_lexer_skip_token();
 end;
 
 proc _compile_global_designator();
 var
 	name: Word;
+	token_kind: Word;
 begin
 	_write_z("\tla t0, \0");
 
-	name := _lexer_read_token();
+	name := _lexer_read_token(@token_kind);
 	_write_token(name);
-	_advance_token(name);
+	_lexer_skip_token();
 
 	_write_c('\n');
 end;
@@ -737,12 +743,18 @@ proc _compile_designator();
 var
 	name_token: Word;
 	lookup_result: Word;
+	token_kind: Word;
+	name: Word;
 begin
-	name_token := _lexer_read_token();
-	lookup_result := _symbol_table_lookup(@symbol_table_local, source_code_position, name_token);
+	name_token := _lexer_read_token(@token_kind);
+	name := _lexer_global_start();
+	name := _load_word(name);
+	name_token := _lexer_global_end();
+	name_token := _load_word(name_token) + -name;
+	lookup_result := _symbol_table_lookup(@symbol_table_local, name, name_token);
 
 	if lookup_result <> 0 then
-		_compile_local_designator(lookup_result, name_token);
+		_compile_local_designator(lookup_result);
 		goto .compile_designator_end;
 	end;
 	_compile_global_designator();
@@ -758,7 +770,7 @@ begin
 	_write_z("\tsw t0, 60(sp)\n\0");
 
 	(* Skip the assignment sign (:=) with surrounding whitespaces. *)
-	_advance_token(4);
+	source_code_position := source_code_position + 4;
 
 	(* Compile the assignment. *)
 	_compile_expression();
@@ -767,9 +779,13 @@ begin
 end;
 
 proc _compile_return_statement();
+var
+	token_kind: Word;
 begin
 	(* Skip "return" keyword and whitespace after it. *)
-	_advance_token(7);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+	source_code_position := source_code_position + 1;
 	_compile_expression();
 
 	_write_z("\tmv a0, t0\n\0");
@@ -789,13 +805,18 @@ proc _compile_if();
 var
 	after_end_label: Word;
 	condition_label: Word;
+	token_kind: Word;
 begin
 	(* Skip "if ". *)
-	_advance_token(3);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+	source_code_position := source_code_position + 1;
+
 	(* Compile condition. *)
 	_compile_expression();
 	(* Skip " then" with newline. *)
-	_advance_token(6);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
 
 	after_end_label := label_counter;
 	label_counter := label_counter + 1;
@@ -817,20 +838,53 @@ begin
 	_write_label(condition_label);
 	_write_z(":\n\0");
 
-	if _memcmp(source_code_position, "end", 3) = 0 then
+	.compile_if_loop;
+
+	_lexer_read_token(@token_kind);
+	if token_kind = _lexer_token_kind_end() then
 		goto .compile_if_end;
 	end;
-	if _memcmp(source_code_position, "else", 3) = 0 then
-		goto .compile_if_else
+	if token_kind = _lexer_token_kind_else() then
+		goto .compile_if_else;
+	end;
+	if token_kind = _lexer_token_kind_elsif() then
+		goto .compile_if_elsif;
 	end;
+	.compile_if_elsif;
+	_lexer_skip_token();
+	source_code_position := source_code_position + 1;
+
+	(* Compile condition. *)
+	_compile_expression();
+	(* Skip " then" with newline. *)
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+
+	(* condition_label is the label in front of the next elsif condition or end. *)
+	condition_label := label_counter;
+	label_counter := label_counter + 1;
+
+	_write_z("\tbeqz t0, \0");
+	_write_label(condition_label);
+	_write_c('\n');
+
+	_compile_procedure_body();
+
+	_write_z("\tj \0");
+	_write_label(after_end_label);
+	_write_c('\n');
+
+	_write_label(condition_label);
+	_write_z(":\n\0");
+
+	goto .compile_if_loop;
+
 	.compile_if_else;
-	(* Skip "else" and newline. *)
-	_advance_token(5);
+	_lexer_skip_token();
 	_compile_procedure_body();
 
 	.compile_if_end;
-	(* Skip "end". *)
-	_advance_token(3);
+	_lexer_skip_token();
 
 	_write_label(after_end_label);
 	_write_z(":\n\0");
@@ -839,74 +893,77 @@ end;
 proc _compile_label_declaration();
 var
 	label_token: Word;
+	token_kind: Word;
+	name: Word;
 begin
 	(* Skip the dot. *)
-	_advance_token(1);
-	label_token := _lexer_read_token();
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+	label_token := _lexer_read_token(@token_kind);
+	name := _lexer_global_start();
+	name := _load_word(name);
 	_write_c('.');
-	_write_s(source_code_position, label_token);
+	_write_s(name, label_token);
 	_write_z(":\n\0");
-	_advance_token(label_token);
+	_lexer_skip_token();
 end;
 
 proc _compile_statement();
 var
 	current_byte: Word;
+	token_kind: Word;
 begin
-	_skip_spaces();
-	current_byte := _load_byte(source_code_position);
+	_lexer_read_token(@token_kind);
 
-	(* This is a call if the statement starts with an underscore. *)
-	if current_byte = '_' then
-		_compile_call();
-		goto .compile_statement_semicolon;
-	end;
-	if _memcmp(source_code_position, "goto ", 5) = 0 then
+	if token_kind = _lexer_token_kind_goto() then
 		_compile_goto();
 		goto .compile_statement_semicolon;
 	end;
-	if _memcmp(source_code_position, "if ", 3) = 0 then
+	if token_kind = _lexer_token_kind_if() then
 		_compile_if();
 		goto .compile_statement_semicolon;
 	end;
-	if _memcmp(source_code_position, "return ", 7) = 0 then
+	if token_kind = _lexer_token_kind_return() then
 		_compile_return_statement();
-		_write_c('\n');
-
-		goto .compile_statement_end;
+		goto .compile_statement_semicolon;
 	end;
-	if current_byte = '.' then
+	if token_kind = _lexer_token_kind_dot() then
 		_compile_label_declaration();
+		goto .compile_statement_semicolon;
+	end;
+	if token_kind = _lexer_token_kind_identifier() then
+		current_byte := _lexer_global_start();
+		current_byte := _load_word(current_byte);
+		current_byte := _load_byte(current_byte);
 
+		(* This is a call if the statement starts with an underscore. *)
+		if current_byte = '_' then
+			_compile_call();
+		else
+			_compile_assignment();
+		end;
 		goto .compile_statement_semicolon;
 	end;
-	_compile_assignment();
-	goto .compile_statement_semicolon;
 
 	.compile_statement_semicolon;
-	_advance_token(2);
 	_write_c('\n');
-
-	.compile_statement_end;
 end;
 
 proc _compile_procedure_body();
 var
-	lhs: Word;
-	rhs: Word;
+	token_kind: Word;
 begin
 	.compile_procedure_body_loop;
-	_skip_empty_lines();
-	_skip_spaces();
 
-	lhs := _memcmp(source_code_position, "end", 3) = 0;
-	rhs := _memcmp(source_code_position, "else", 4) = 0;
-	lhs := lhs or rhs;
+	_skip_empty_lines();
+	_compile_statement();
+	_lexer_read_token(@token_kind);
 
-	if lhs = 0 then
-		_compile_statement();
+	if token_kind = _lexer_token_kind_semicolon() then
+		_lexer_skip_token();
 		goto .compile_procedure_body_loop;
 	end;
+	_skip_empty_lines();
 end;
 
 (* Writes a regster name to the standard output. *)
@@ -927,7 +984,7 @@ var
 begin
 	current_byte := _load_byte(source_code_position);
 	if current_byte = '\t' then
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_skip_spaces();
 	end;
 end;
@@ -935,9 +992,10 @@ end;
 proc _read_type_expression();
 var
 	type_name: Word;
+	token_kind: Word;
 begin
-	type_name := _lexer_read_token();
-	_advance_token(type_name);
+	type_name := _lexer_read_token(@token_kind);
+	_lexer_skip_token();
 end;
 
 (* Parameters: *)
@@ -1009,14 +1067,15 @@ var
 	name_length: Word;
 	info: Word;
 	name_position: Word;
+	token_kind: Word;
 begin
 	(* Read the parameter name. *)
 	name_position := source_code_position;
-	name_length := _lexer_read_token();
-	_advance_token(name_length);
+	name_length := _lexer_read_token(@token_kind);
+	_lexer_skip_token();
 
 	(* Skip colon and space in front of the type expression. *)
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 
 	_read_type_expression();
 
@@ -1038,7 +1097,7 @@ var
 	parameter_counter: Word;
 begin
 	(* Skip open paren. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 	parameter_counter := 0;
 
 	.compile_procedure_prologue_skip;
@@ -1047,12 +1106,12 @@ begin
 		parameter_counter := parameter_counter + 1;
 
 		if _load_byte(source_code_position) = ',' then
-			_advance_token(2);
+			source_code_position := source_code_position + 2;
 			goto .compile_procedure_prologue_skip;
 		end;
 	end;
 	(* Skip close paren. *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 end;
 
 (* Parameters: *)
@@ -1062,13 +1121,15 @@ var
 	name_length: Word;
 	info: Word;
 	name_position: Word;
+	token_kind: Word;
 begin
 	_skip_spaces();
 	name_position := source_code_position;
 
 	(* Read and skip variable name, colon and the space *)
-	name_length := _lexer_read_token();
-	_advance_token(name_length + 2);
+	name_length := _lexer_read_token(@token_kind);
+	_lexer_skip_token(name_length);
+	source_code_position := source_code_position + 2;
 
 	_read_type_expression();
 
@@ -1076,7 +1137,7 @@ begin
 	_symbol_table_enter(@symbol_table_local, name_position, name_length, info);
 
 	(* Skip semicolon and newline after the variable declaration *)
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 end;
 
 proc _read_procedure_temporaries();
@@ -1086,7 +1147,7 @@ begin
 	if _memcmp(source_code_position, "var", 3) <> 0 then
 		goto .read_local_variables_end;
 	end;
-	_advance_token(4);
+	source_code_position := source_code_position + 4;
 	temporary_counter := 0;
 
 	.read_local_variables_loop;
@@ -1104,13 +1165,14 @@ end;
 proc _compile_procedure();
 var
 	name_length: Word;
+	token_kind: Word;
 begin
 	(* Skip "proc ". *)
-	_advance_token(5);
+	source_code_position := source_code_position + 5;
 	(* Clear local symbol table. *)
 	_store_word(0, @symbol_table_local);
 
-	name_length := _lexer_read_token();
+	name_length := _lexer_read_token(@token_kind);
 
 	(* Write .type _procedure_name, @function. *)
 	_write_z(".type \0");
@@ -1123,16 +1185,16 @@ begin
 	_write_z(":\n\0");
 
 	(* Skip procedure name. *)
-	_advance_token(name_length);
+	_lexer_skip_token();
 	_write_z("\taddi sp, sp, -128\n\tsw ra, 124(sp)\n\tsw s0, 120(sp)\n\taddi s0, sp, 128\n\0");
 	_read_procedure_parameters();
 
 	(* Skip semicolon and newline. *)
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 	_read_procedure_temporaries();
 
 	(* Skip semicolon, "begin" and newline. *)
-	_advance_token(6);
+	source_code_position := source_code_position + 6;
 
 	_compile_procedure_body();
 
@@ -1140,16 +1202,17 @@ begin
 	_write_z("\tlw ra, 124(sp)\n\tlw s0, 120(sp)\n\taddi sp, sp, 128\n\tret\n\0");
 
 	(* Skip the "end" keyword, semicolon and newline. *)
-	_advance_token(5);
+	source_code_position := source_code_position + 5;
 end;
 
 (* Prints and skips a line. *)
 proc _skip_comment();
 var
-	new_position: Word;
+	token_kind: Word;
 begin
-	new_position := _lexer_read_token();
-	_advance_token(new_position + 1);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
+	source_code_position := source_code_position + 1;
 end;
 
 (* Skip newlines and comments. *)
@@ -1176,7 +1239,7 @@ begin
 	current_byte := _load_byte(current_position + 1);
 
 	if current_byte = '*' then
-		goto .skip_empty_lines_comment
+		goto .skip_empty_lines_comment;
 	end;
 	goto .skip_empty_lines_end;
 
@@ -1191,7 +1254,7 @@ begin
 
 	.skip_empty_lines_tab;
 	current_position := current_position + 1;
-	goto .skip_empty_lines_loop
+	goto .skip_empty_lines_loop;
 
 	.skip_empty_lines_end;
 end;
@@ -1200,6 +1263,7 @@ proc _compile_global_initializer();
 var
 	current_byte: Word;
 	length: Word;
+	token_kind: Word;
 begin
 	current_byte := _load_byte(source_code_position);
 
@@ -1211,13 +1275,14 @@ begin
 		_write_i();
 
 		(* Skip the quoted string. *)
-		_advance_token(length + 2);
+		source_code_position := source_code_position + length;
+		source_code_position := source_code_position + 2;
 
 		goto .compile_global_initializer_end;
 	end;
 	if current_byte = 'S' then
 		(* Skip "S(". *)
-		_advance_token(2);
+		source_code_position := source_code_position + 2;
 
 		if _load_byte(source_code_position) = ')' then
 			goto .compile_global_initializer_closing;
@@ -1226,19 +1291,19 @@ begin
 	end;
 	if current_byte = '@' then
 		(* Skip @. *)
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 		_write_z("\n\t.word \0");
-		current_byte := _lexer_read_token();
+		current_byte := _lexer_read_token(@token_kind);
 		_write_token(current_byte);
-		_advance_token(current_byte);
+		_lexer_skip_token();
 
 		goto .compile_global_initializer_end;
 	end;
 	if _is_digit(current_byte) = 1 then
 		_write_z("\n\t.word \0");
-		current_byte := _lexer_read_token();
+		current_byte := _lexer_read_token(@token_kind);
 		_write_token(current_byte);
-		_advance_token(1);
+		source_code_position := source_code_position + 1;
 
 		goto .compile_global_initializer_end;
 	end;
@@ -1248,14 +1313,14 @@ begin
 
 	if _load_byte(source_code_position) <> ')' then
 		(* Skip comma and whitespace after it. *)
-		_advance_token(2);
+		source_code_position := source_code_position + 2;
 
 		goto .compile_global_initializer_loop;
 	end;
 
 	.compile_global_initializer_closing;
 	(* Skip ")" *)
-	_advance_token(1);
+	source_code_position := source_code_position + 1;
 
 	goto .compile_global_initializer_end;
 
@@ -1265,8 +1330,9 @@ end;
 proc _compile_constant_declaration();
 var
 	name_length: Word;
+	token_kind: Word;
 begin
-	name_length := _lexer_read_token();
+	name_length := _lexer_read_token(@token_kind);
 
 	_write_z(".type \0");
 	_write_token(name_length);
@@ -1276,22 +1342,26 @@ begin
 	_write_c(':');
 
 	(* Skip the constant name with assignment sign and surrounding whitespaces. *)
-	_advance_token(name_length + 4);
+	_lexer_skip_token();
+	source_code_position := source_code_position + 4;
 	_compile_global_initializer();
 	(* Skip semicolon and newline. *)
-	_advance_token(2);
+	source_code_position := source_code_position + 2;
 	_write_c('\n');
 end;
 
 proc _compile_const_part();
+var
+	token_kind: Word;
 begin
 	_skip_empty_lines();
+	_lexer_read_token(@token_kind);
 
-	if _memcmp(source_code_position, "const\0", 5) <> 0 then
+	if token_kind <> _lexer_token_kind_const() then
 		goto .compile_const_part_end;
 	end;
 	(* Skip "const" with the newline after it. *)
-	_advance_token(6);
+	_lexer_skip_token();
 	_write_z(".section .rodata # Compiled from const section.\n\n\0");
 
 	.compile_const_part_loop;
@@ -1300,8 +1370,7 @@ begin
 	(* If the character at the line beginning is not indentation, *)
 	(* it is probably the next code section. *)
 	if _load_byte(source_code_position) = '\t' then
-		_advance_token(1);
-
+		source_code_position := source_code_position + 1;
 		_compile_constant_declaration();
 		goto .compile_const_part_loop;
 	end;
@@ -1312,8 +1381,9 @@ end;
 proc _compile_variable_declaration();
 var
 	name_length: Word;
+	token_kind: Word;
 begin
-	name_length := _lexer_read_token();
+	name_length := _lexer_read_token(@token_kind);
 
 	_write_z(".type \0");
 	_write_token(name_length);
@@ -1323,7 +1393,9 @@ begin
 	_write_c(':');
 
 	(* Skip the variable name and colon with space before the type. *)
-	_advance_token(name_length + 2);
+	_lexer_skip_token();
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
 	_read_type_expression();
 
 	if _load_byte(source_code_position) <> ' ' then
@@ -1331,32 +1403,34 @@ begin
 		_write_z(" .zero 81920\0");
 	else
 		(* Skip the assignment sign with surrounding whitespaces. *)
-		_advance_token(4);
+		source_code_position := source_code_position + 4;
 		_compile_global_initializer();
 	end;
 
 	(* Skip semicolon and newline. *)
-	_advance_token(2);
+	_lexer_read_token(@token_kind);
+	_lexer_skip_token();
 	_write_c('\n');
 end;
 
 proc _compile_var_part();
 var
-	current_character: Word;
+	token_kind: Word;
 begin
-	if _memcmp(source_code_position, "var\0", 3) <> 0 then
+	_lexer_read_token(@token_kind);
+
+	if token_kind <> _lexer_token_kind_var() then
 		goto .compile_var_part_end;
 	end;
 	(* Skip "var" and newline. *)
-	_advance_token(4);
+	_lexer_skip_token();
 	_write_z(".section .data\n\0");
 
 	.compile_var_part_loop;
 	_skip_empty_lines();
-	current_character := _load_byte(source_code_position);
+	_lexer_read_token(@token_kind);
 
-	if current_character = '\t' then
-		_advance_token(1);
+	if token_kind = _lexer_token_kind_identifier() then
 		_compile_variable_declaration();
 		goto .compile_var_part_loop;
 	end;
@@ -2222,14 +2296,445 @@ begin
 	return _lexer_get_transition(current_state, character_class)
 end;
 
-proc _lexer_execute_action(action_to_perform: Word);
+proc _lexer_token_kind_identifier();
+begin
+	return 1
+end;
+
+proc _lexer_token_kind_const();
+begin
+	return 2
+end;
+
+proc _lexer_token_kind_var();
+begin
+	return 3
+end;
+
+proc _lexer_token_kind_proc();
+begin
+	return 4
+end;
+
+proc _lexer_token_kind_type();
+begin
+	return 5
+end;
+
+proc _lexer_token_kind_begin();
+begin
+	return 6
+end;
+
+proc _lexer_token_kind_end();
+begin
+	return 7
+end;
+
+proc _lexer_token_kind_if();
+begin
+	return 8
+end;
+
+proc _lexer_token_kind_then();
+begin
+	return 9
+end;
+
+proc _lexer_token_kind_else();
+begin
+	return 10
+end;
+
+proc _lexer_token_kind_elsif();
+begin
+	return 11
+end;
+
+proc _lexer_token_kind_while();
+begin
+	return 12
+end;
+
+proc _lexer_token_kind_do();
+begin
+	return 13
+end;
+
+proc _lexer_token_kind_extern();
+begin
+	return 14
+end;
+
+proc _lexer_token_kind_record();
+begin
+	return 15
+end;
+
+proc _lexer_token_kind_union();
+begin
+	return 16
+end;
+
+proc _lexer_token_kind_true();
+begin
+	return 17
+end;
+
+proc _lexer_token_kind_false();
+begin
+	return 18
+end;
+
+proc _lexer_token_kind_nil();
+begin
+	return 19
+end;
+
+proc _lexer_token_kind_and();
+begin
+	return 20
+end;
+
+proc _lexer_token_kind_or();
+begin
+	return 21
+end;
+
+proc _lexer_token_kind_xor();
+begin
+	return 22
+end;
+
+proc _lexer_token_kind_pipe();
+begin
+	return 23
+end;
+
+proc _lexer_token_kind_not();
+begin
+	return 24
+end;
+
+proc _lexer_token_kind_return();
+begin
+	return 24
+end;
+
+proc _lexer_token_kind_module();
+begin
+	return 25
+end;
+
+proc _lexer_token_kind_program();
+begin
+	return 26
+end;
+
+proc _lexer_token_kind_import();
+begin
+	return 27
+end;
+
+proc _lexer_token_kind_cast();
+begin
+	return 28
+end;
+
+proc _lexer_token_kind_defer();
+begin
+	return 29
+end;
+
+proc _lexer_token_kind_case();
+begin
+	return 30
+end;
+
+proc _lexer_token_kind_of();
+begin
+	return 31
+end;
+
+proc _lexer_token_kind_trait();
+begin
+	return 32
+end;
+
+proc _lexer_token_kind_left_paren();
+begin
+	return 33
+end;
+
+proc _lexer_token_kind_right_paren();
+begin
+	return 34
+end;
+
+proc _lexer_token_kind_left_square();
+begin
+	return 35
+end;
+
+proc _lexer_token_kind_right_square();
+begin
+	return 36
+end;
+
+proc _lexer_token_kind_shift_left();
+begin
+	return 37
+end;
+
+proc _lexer_token_kind_shift_right();
+begin
+	return 38
+end;
+
+proc _lexer_token_kind_greater_equal();
+begin
+	return 39
+end;
+
+proc _lexer_token_kind_less_equal();
+begin
+	return 40
+end;
+
+proc _lexer_token_kind_greater_than();
+begin
+	return 41
+end;
+
+proc _lexer_token_kind_less_than();
+begin
+	return 42
+end;
+
+proc _lexer_token_kind_not_equal();
+begin
+	return 43
+end;
+
+proc _lexer_token_kind_equals();
+begin
+	return 44
+end;
+
+proc _lexer_token_kind_semicolon();
+begin
+	return 45
+end;
+
+proc _lexer_token_kind_dot();
+begin
+	return 46
+end;
+
+proc _lexer_token_kind_comma();
+begin
+	return 47
+end;
+
+proc _lexer_token_kind_plus();
+begin
+	return 48
+end;
+
+proc _lexer_token_kind_arrow();
+begin
+	return 49
+end;
+
+proc _lexer_token_kind_minus();
+begin
+	return 50
+end;
+
+proc _lexer_token_kind_multiplication();
+begin
+	return 51
+end;
+
+proc _lexer_token_kind_division();
+begin
+	return 52
+end;
+
+proc _lexer_token_kind_remainder();
+begin
+	return 53
+end;
+
+proc _lexer_token_kind_assignment();
+begin
+	return 54
+end;
+
+proc _lexer_token_kind_colon();
+begin
+	return 55
+end;
+
+proc _lexer_token_kind_hat();
+begin
+	return 56
+end;
+
+proc _lexer_token_kind_at();
+begin
+	return 57
+end;
+
+proc _lexer_token_kind_exclamation();
+begin
+	return 58
+end;
+
+proc _lexer_token_kind_string();
+begin
+	return 59
+end;
+
+proc _lexer_token_kind_character();
+begin
+	return 60
+end;
+
+proc _lexer_token_kind_integer();
+begin
+	return 61
+end;
+
+proc _lexer_token_kind_word();
+begin
+	return 62
+end;
+
+proc _lexer_token_kind_goto();
+begin
+	return 63
+end;
+
+proc _lexer_compare_keyword(lhs_pointer: Word, lhs_length: Word, rhs_pointer: Word, rhs_length: Word);
+var
+	result: Word;
+begin
+	result := 0;
+
+	if lhs_length = rhs_length then
+		result := _memcmp(lhs_pointer, rhs_pointer, lhs_length) = 0;
+	end;
+	return result
+end;
+
+proc _lexer_classify_keyword(position_start: Word, position_end: Word);
+var
+	result: Word;
+	token_length: Word;
+begin
+	result := _lexer_token_kind_identifier();
+	token_length := position_end + -position_start;
+
+	if _lexer_compare_keyword(position_start, token_length, "const", 5) = 1 then
+		result := _lexer_token_kind_const();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "var", 3) = 1 then
+		result := _lexer_token_kind_var();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "proc", 4) = 1 then
+		result := _lexer_token_kind_proc();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "type", 4) = 1 then
+		result := _lexer_token_kind_type();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "begin", 5) = 1 then
+		result := _lexer_token_kind_begin();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "end", 3) = 1 then
+		result := _lexer_token_kind_end();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "return", 6) = 1 then
+		result := _lexer_token_kind_return();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "goto", 4) = 1 then
+		result := _lexer_token_kind_goto();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "if", 2) = 1 then
+		result := _lexer_token_kind_if();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "while", 5) = 1 then
+		result := _lexer_token_kind_while();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "then", 4) = 1 then
+		result := _lexer_token_kind_then();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "else", 4) = 1 then
+		result := _lexer_token_kind_else();
+		goto .lexer_classify_keyword_end;
+	end;
+	if _lexer_compare_keyword(position_start, token_length, "elsif", 5) = 1 then
+		result := _lexer_token_kind_elsif();
+		goto .lexer_classify_keyword_end;
+	end;
+	.lexer_classify_keyword_end;
+	return result
+end;
+
+proc _lexer_classify_finalize(start_position: Word);
+var
+	character: Word;
+	result: Word;
+begin
+	result := 0;
+	character := _load_byte(start_position);
+
+	if character = ':' then
+		result := _lexer_token_kind_colon();
+		goto .lexer_classify_finalize_result;
+	end;
+	if character = '.' then
+		result := _lexer_token_kind_dot();
+		goto .lexer_classify_finalize_result;
+	end;
+	.lexer_classify_finalize_result;
+	return result
+end;
+
+proc _lexer_classify_single(start_position: Word);
+var
+	character: Word;
+	result: Word;
+begin
+	result := 0;
+	character := _load_byte(start_position);
+
+	if character = ';' then
+		result := _lexer_token_kind_semicolon();
+	end;
+	return result
+end;
+
+proc _lexer_execute_action(action_to_perform: Word, kind: Word);
 var
 	pointer_start: Word;
 	pointer_end: Word;
 	position_start: Word;
 	position_end: Word;
+	intermediate: Word;
 begin
-	pointer_start := _lexer_global_end();
+	pointer_start := _lexer_global_start();
 	position_start := _load_word(pointer_start);
 	pointer_end := _lexer_global_end();
 	position_end := _load_word(pointer_end);
@@ -2247,34 +2752,40 @@ begin
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_single() then
+		_store_word(position_end + 1, pointer_end);
+
+		intermediate := _lexer_classify_single(position_start);
+		_store_word(intermediate, kind);
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_eof() then
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_finalize() then
+		intermediate := _lexer_classify_finalize(position_start);
+		_store_word(intermediate, kind);
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_composite() then
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_key_id() then
-		_store_word(position_end + 1, pointer_end);
+		intermediate := _lexer_classify_keyword(position_start, position_end);
+		_store_word(intermediate, kind);
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_integer() then
-		_store_word(position_end + 1, pointer_end);
 		goto .action_to_perform_end;
 	end;
 	if action_to_perform = _lexer_action_delimited() then
-		_store_word(position_end + 1, pointer_end);
+		(* _store_word(position_end + 1, pointer_end); *)
 		goto .action_to_perform_end;
 	end;
 
 	.action_to_perform_end;
 end;
 
-proc _lexer_execute_transition();
+proc _lexer_execute_transition(kind: Word);
 var
 	next_transition: Word;
 	next_state: Word;
@@ -2288,50 +2799,60 @@ begin
 	global_state := _lexer_global_state();
 
 	_store_word(next_state, global_state);
-	_lexer_execute_action(action_to_perform);
+	_lexer_execute_action(action_to_perform, kind);
 
 	return next_state
 end;
 
-proc _lexer_advance_token();
-var
-	executed_transition: Word;
+proc _lexer_advance_token(kind: Word);
 begin
-	.lexer_advance_token_loop;
-	executed_transition := _lexer_execute_transition();
-
-	if executed_transition <> _lexer_state_end() then
-		goto .lexer_advance_token_loop;
+	if _lexer_execute_transition(kind) <> _lexer_state_end() then
+		_lexer_advance_token(kind);
 	end;
 end;
 
 (* Reads the next token. *)
 
 (* Returns token length in a0. *)
-proc _lexer_read_token();
+proc _lexer_read_token(kind: Word);
 var
 	new_position: Word;
-	token_end: Word;
 begin
 	_lexer_reset();
-	_lexer_advance_token();
+	_lexer_advance_token(kind);
 
 	new_position := _lexer_global_end();
-	token_end := _load_word(new_position);
-	token_end := token_end + -source_code_position;
+	return _load_word(new_position) + -source_code_position
+end;
 
-	return token_end + -1
+(* Advances the token stream past the last read token. *)
+proc _lexer_skip_token();
+var
+	new_position: Word;
+begin
+	new_position := _lexer_global_end();
+	source_code_position := _load_word(new_position);
 end;
 
 (* Entry point. *)
 proc _start();
+var
+	last_read: Word;
+	offset: Wort;
 begin
 	_lexer_initialize();
 	_symbol_table_build();
 
 	(* Read the source from the standard input. *)
+	offset := @source_code;
+
+	.start_read;
 	(* Second argument is buffer size. Modifying update the source_code definition. *)
-	_read_file(@source_code, 81920);
+	last_read := _read_file(offset, 81920);
+	if last_read > 0 then
+		offset := offset + last_read;
+		goto .start_read;
+	end;
 	_compile();
 
 	_exit(0);
author	Eugen Wissner <belka@caraus.de>	2025-09-23 22:22:38 +0200
committer	Eugen Wissner <belka@caraus.de>	2025-09-23 22:22:38 +0200
commit	0cc41f2d838630f5117d57e1491ffd4a6d613832 (patch)
tree	119f3f76ca5c6a0cdd817575e8df565519fd6a9c /boot/stage13.elna
parent	6e9086aa26a37ef8d89dd54b773e614a80efe720 (diff)
download	elna-0cc41f2d838630f5117d57e1491ffd4a6d613832.tar.gz