1 files changed, 295 insertions, 52 deletions
diff --git a/boot/stage8.elna b/boot/stage8.elna
index 6973963..c88e19d 100644
--- a/boot/stage8.elna
+++ b/boot/stage8.elna
@@ -2,11 +2,37 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-# Stage 7 compiler.
+# Stage 8 compiler.
 #
-# - String literals.
 
-.section .bss
+const
+	symbol_builtin_name_int := "Int";
+	symbol_builtin_name_word := "Word";
+	symbol_builtin_name_pointer := "Pointer";
+	symbol_builtin_name_char := "Char";
+	symbol_builtin_name_bool := "Bool";
+
+	# Every type info starts with a word describing what type it is.
+	#
+	# PRIMITIVE_TYPE = 1
+	#
+	# Primitive types have only type size.
+	symbol_builtin_type_int := S(1, 4);
+	symbol_builtin_type_word := S(1, 4);
+	symbol_builtin_type_pointer := S(1, 4);
+	symbol_builtin_type_char := S(1, 1);
+	symbol_builtin_type_bool := S(1, 1);
+
+	# Info objects start with a word describing its type.
+	#
+	# INFO_TYPE = 1
+	#
+	# Type info has the type it belongs to.
+	symbol_type_info_int := S(1, @symbol_builtin_type_int);
+	symbol_type_info_word := S(1, @symbol_builtin_type_word);
+	symbol_type_info_pointer := S(1, @symbol_builtin_type_pointer);
+	symbol_type_info_char := S(1, @symbol_builtin_type_char);
+	symbol_type_info_bool := S(1, @symbol_builtin_type_bool);
 
 # When modifiying also change the read size in the entry point procedure.
 .type source_code, @object
@@ -15,18 +41,10 @@ source_code: .zero 81920
 .type compiler_strings, @object
 compiler_strings: .zero 8192
 
-.section .data
-
-.type compiler_strings_position, @object
-compiler_strings_position: .word compiler_strings
-
-.type compiler_strings_length, @object
-compiler_strings_length: .word 0
-
-.type source_code_position, @object
-source_code_position: .word source_code
-
-.section .text
+var
+	compiler_strings_position: Pointer := @compiler_strings;
+	compiler_strings_length: Word := 0;
+	source_code_position: Pointer := @source_code;
 
 # Calculates and returns the string token length between quotes, including the
 # escaping slash characters.
@@ -1098,57 +1116,291 @@ begin
 	_advance_token(5);
 end;
 
-proc _compile_type();
+proc _skip_newlines();
 begin
-	# Print and skip the ".type" (5 characters) directive and a space after it.
-	_write_token(6);
-	_advance_token();
+	# Skip newlines.
+	la t0, source_code_position
+	lw t1, (t0)
 
-	# Read and print the symbol name.
-	_read_token();
+.skip_newlines_loop:
+	lb t2, (t1)
+	li t3, '\n'
+	bne t2, t3, .skip_newlines_end
+	beqz t2, .skip_newlines_end
+
+	addi t1, t1, 1
+	sw t1, (t0)
+
+	goto .skip_newlines_loop;
+
+.skip_newlines_end:
+end;
 
-	# Print and skip the symbol name, comma, space and @.
-	addi a0, a0, 3
+# Skip newlines and comments.
+proc _skip_empty_lines();
+begin
+.skip_empty_lines_loop:
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	li t1, '#'
+	beq t0, t1, .skip_empty_lines_comment
+
+	li t1, '\n'
+	beq t0, t1, .skip_empty_lines_newline
+
+	goto .skip_empty_lines_end;
+
+.skip_empty_lines_comment:
+	_skip_comment();
+	goto .skip_empty_lines_loop;
+
+.skip_empty_lines_newline:
+	_advance_token(1);
+	goto .skip_empty_lines_loop;
+
+.skip_empty_lines_end:
+end;
+
+proc _compile_global_initializer();
+begin
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	li t1, '"'
+	beq t0, t1, .compile_global_initializer_string
+
+	li t1, 'S'
+	beq t0, t1, .compile_global_initializer_record
+
+	li t1, '@'
+	beq t0, t1, .compile_global_initializer_pointer
+
+	la a0, source_code_position
+	lw a0, (a0)
+	lb a0, (a0)
+	_is_digit();
+	bnez a0, .compile_global_initializer_number
+
+	unimp
+
+.compile_global_initializer_pointer:
+	# Skip @.
+	_advance_token(1);
+	_write_z("\n\t.word \0");
+	_read_token();
 	_write_token();
 	_advance_token();
 
-	# Read the symbol type.
-	_read_token();
+	goto .compile_global_initializer_end;
 
-	# Print the symbol type and newline.
-	addi a0, a0, 1
+.compile_global_initializer_number:
+	_write_z("\n\t.word \0");
+	_read_token();
 	_write_token();
+	_advance_token(1);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_record:
+	# Skip "S(".
+	_advance_token(2);
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ')'
+	beq t0, t1, .compile_global_initializer_closing
+
+.compile_global_initializer_loop:
+	_compile_global_initializer();
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ')'
+	beq t0, t1, .compile_global_initializer_closing
+
+	# Skip comma and whitespace after it.
+	_advance_token(2);
+
+	goto .compile_global_initializer_loop;
+
+.compile_global_initializer_closing:
+	# Skip ")"
+	_advance_token(1);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_string:
+	_write_z("\n\t.word strings + \0");
+	_string_length(source_code_position);
+	sw a0, 4(sp)
+
+	_add_string(source_code_position);
+	_write_i();
+
+	# Skip the quoted string.
+	_advance_token(v4 + 2);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_end:
+end;
+
+proc _compile_constant_declaration();
+begin
+	_read_token();
+	sw a0, 0(sp)
+
+	_write_z(".type \0");
+	_write_token(v0);
+	_write_z(", @object\n\0");
+
+	_write_token(v0);
+	_write_c(':');
+
+	# Skip the constant name with assignment sign and surrounding whitespaces.
+	_advance_token(v0 + 4);
+	_compile_global_initializer();
+	# Skip semicolon and newline.
+	_advance_token(2);
+	_write_c('\n');
+end;
+
+proc _compile_const_part();
+begin
+	_skip_empty_lines();
+
+	_memcmp(source_code_position, "const\0", 5);
+	bnez a0, .compile_const_part_end
+
+	# Skip "const" with the newline after it.
+	_advance_token(6);
+	_write_z(".section .rodata # Compiled from const section.\n\n\0");
+
+.compile_const_part_loop:
+	_skip_empty_lines();
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	# If the character at the line beginning is not indentation,
+	# it is probably the next code section.
+	li t1, '\t'
+	bne t0, t1, .compile_const_part_end
+
+	_advance_token(1);
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, '#'
+	beq t0, t1, .compile_const_part_loop
+
+	_compile_constant_declaration();
+	goto .compile_const_part_loop;
+
+.compile_const_part_end:
+end;
+
+proc _compile_variable_declaration();
+begin
+	_read_token();
+	sw a0, 0(sp)
+
+	_write_z(".type \0");
+	_write_token(v0);
+	_write_z(", @object\n\0");
+
+	_write_token(v0);
+	_write_c(':');
+
+	# Skip the variable name and colon with space before the type.
+	_advance_token(v0 + 2);
+
+	# Skip the type name.
+	_read_token();
 	_advance_token();
 
-	# Write the object definition itself.
-	_compile_line();
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ' '
+	beq t0, t1, .compile_variable_declaration_initializer
 
-.compile_type_end:
+	# Else we assume this is a zeroed 81920 bytes big array.
+	_write_z(" .zero 81920\0");
+	goto .compile_variable_declaration_finalize;
+
+.compile_variable_declaration_initializer:
+	# Skip the assignment sign with surrounding whitespaces.
+	_advance_token(4);
+	_compile_global_initializer();
+	goto .compile_variable_declaration_finalize;
+
+.compile_variable_declaration_finalize:
+	# Skip semicolon and newline.
+	_advance_token(2);
+	_write_c('\n');
 end;
 
-proc _skip_newlines();
+proc _compile_var_part();
 begin
-	# Skip newlines.
+	_memcmp(source_code_position, "var\0", 3);
+	bnez a0, .compile_var_part_end
+
+	# Skip "var" and newline.
+	_advance_token(4);
+	_write_z(".section .data\n\0");
+
+.compile_var_part_loop:
 	la t0, source_code_position
-	lw t1, (t0)
+	lw t0, (t0)
+	lb t0, (t0)
 
-.skip_newlines_loop:
-	lb t2, (t1)
-	li t3, '\n'
-	bne t2, t3, .skip_newlines_end
-	beqz t2, .skip_newlines_end
+	li t1, 'p'
+	beq t0, t1, .compile_var_part_end
 
-	addi t1, t1, 1
-	sw t1, (t0)
+	li t1, '\t'
+	beq t0, t1, .compile_var_part_declaration
 
-	goto .skip_newlines_loop;
+	_compile_line();
+	goto .compile_var_part_loop;
 
-.skip_newlines_end:
+.compile_var_part_declaration:
+	_advance_token(1);
+	_compile_variable_declaration();
+	goto .compile_var_part_loop;
+
+.compile_var_part_end:
 end;
 
 # Process the source code and print the generated code.
 proc _compile_module();
 begin
+	_compile_const_part();
+	_write_z(".section .bss\n\0");
+
+.compile_module_bss:
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, 'v'
+	beq t0, t1, .compile_module_code
+
+	li t1, 'p'
+	beq t0, t1, .compile_module_code
+
+	_compile_line();
+	goto .compile_module_bss;
+
+.compile_module_code:
+	_compile_var_part();
+	_write_z(".section .text\n\0");
 .compile_module_loop:
 	_skip_newlines();
 
@@ -1163,10 +1415,6 @@ begin
 	_memcmp(source_code_position, ".section", 8);
 	beqz a0, .compile_module_section
 
-	# 5 is ".type" length.
-	_memcmp(source_code_position, ".type", 5);
-	beqz a0, .compile_module_type
-
 	# 5 is "proc " length. Space is needed to distinguish from "procedure".
 	_memcmp(source_code_position, "proc ", 5);
 	beqz a0, .compile_module_procedure
@@ -1183,11 +1431,6 @@ begin
 
 	goto .compile_module_loop;
 
-.compile_module_type:
-	_compile_type();
-
-	goto .compile_module_loop;
-
 .compile_module_global:
 	_compile_line();
 
@@ -1208,6 +1451,7 @@ end;
 
 proc _compile();
 begin
+	_write_z(".globl _start\n\n\0");
 	_compile_module();
 
 	_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0");
@@ -1247,7 +1491,6 @@ begin
 end;
 
 # Entry point.
-.globl _start
 proc _start();
 begin
 	# Read the source from the standard input.