1 files changed, 265 insertions, 37 deletions
diff --git a/boot/stage7.elna b/boot/stage7.elna
index 6973963..f83a8a5 100644
--- a/boot/stage7.elna
+++ b/boot/stage7.elna
@@ -4,7 +4,9 @@
 
 # Stage 7 compiler.
 #
-# - String literals.
+# - Static global variable and constant initialization.
+# - Objct sections are determined automatically.
+# - _start is always exported.
 
 .section .bss
 
@@ -1098,57 +1100,291 @@ begin
 	_advance_token(5);
 end;
 
-proc _compile_type();
+proc _skip_newlines();
 begin
-	# Print and skip the ".type" (5 characters) directive and a space after it.
-	_write_token(6);
-	_advance_token();
+	# Skip newlines.
+	la t0, source_code_position
+	lw t1, (t0)
 
-	# Read and print the symbol name.
-	_read_token();
+.skip_newlines_loop:
+	lb t2, (t1)
+	li t3, '\n'
+	bne t2, t3, .skip_newlines_end
+	beqz t2, .skip_newlines_end
+
+	addi t1, t1, 1
+	sw t1, (t0)
+
+	goto .skip_newlines_loop;
+
+.skip_newlines_end:
+end;
+
+# Skip newlines and comments.
+proc _skip_empty_lines();
+begin
+.skip_empty_lines_loop:
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	li t1, '#'
+	beq t0, t1, .skip_empty_lines_comment
+
+	li t1, '\n'
+	beq t0, t1, .skip_empty_lines_newline
+
+	goto .skip_empty_lines_end;
+
+.skip_empty_lines_comment:
+	_skip_comment();
+	goto .skip_empty_lines_loop;
+
+.skip_empty_lines_newline:
+	_advance_token(1);
+	goto .skip_empty_lines_loop;
 
-	# Print and skip the symbol name, comma, space and @.
-	addi a0, a0, 3
+.skip_empty_lines_end:
+end;
+
+proc _compile_global_initializer();
+begin
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	li t1, '"'
+	beq t0, t1, .compile_global_initializer_string
+
+	li t1, 'S'
+	beq t0, t1, .compile_global_initializer_record
+
+	li t1, '@'
+	beq t0, t1, .compile_global_initializer_pointer
+
+	la a0, source_code_position
+	lw a0, (a0)
+	lb a0, (a0)
+	_is_digit();
+	bnez a0, .compile_global_initializer_number
+
+	unimp
+
+.compile_global_initializer_pointer:
+	# Skip @.
+	_advance_token(1);
+	_write_z("\n\t.word \0");
+	_read_token();
 	_write_token();
 	_advance_token();
 
-	# Read the symbol type.
-	_read_token();
+	goto .compile_global_initializer_end;
 
-	# Print the symbol type and newline.
-	addi a0, a0, 1
+.compile_global_initializer_number:
+	_write_z("\n\t.word \0");
+	_read_token();
 	_write_token();
+	_advance_token(1);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_record:
+	# Skip "S(".
+	_advance_token(2);
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ')'
+	beq t0, t1, .compile_global_initializer_closing
+
+.compile_global_initializer_loop:
+	_compile_global_initializer();
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ')'
+	beq t0, t1, .compile_global_initializer_closing
+
+	# Skip comma and whitespace after it.
+	_advance_token(2);
+
+	goto .compile_global_initializer_loop;
+
+.compile_global_initializer_closing:
+	# Skip ")"
+	_advance_token(1);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_string:
+	_write_z("\n\t.word strings + \0");
+	_string_length(source_code_position);
+	sw a0, 4(sp)
+
+	_add_string(source_code_position);
+	_write_i();
+
+	# Skip the quoted string.
+	_advance_token(v4 + 2);
+
+	goto .compile_global_initializer_end;
+
+.compile_global_initializer_end:
+end;
+
+proc _compile_constant_declaration();
+begin
+	_read_token();
+	sw a0, 0(sp)
+
+	_write_z(".type \0");
+	_write_token(v0);
+	_write_z(", @object\n\0");
+
+	_write_token(v0);
+	_write_c(':');
+
+	# Skip the constant name with assignment sign and surrounding whitespaces.
+	_advance_token(v0 + 4);
+	_compile_global_initializer();
+	# Skip semicolon and newline.
+	_advance_token(2);
+	_write_c('\n');
+end;
+
+proc _compile_const_part();
+begin
+	_skip_empty_lines();
+
+	_memcmp(source_code_position, "const\0", 5);
+	bnez a0, .compile_const_part_end
+
+	# Skip "const" with the newline after it.
+	_advance_token(6);
+	_write_z(".section .rodata # Compiled from const section.\n\n\0");
+
+.compile_const_part_loop:
+	_skip_empty_lines();
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+
+	# If the character at the line beginning is not indentation,
+	# it is probably the next code section.
+	li t1, '\t'
+	bne t0, t1, .compile_const_part_end
+
+	_advance_token(1);
+
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, '#'
+	beq t0, t1, .compile_const_part_loop
+
+	_compile_constant_declaration();
+	goto .compile_const_part_loop;
+
+.compile_const_part_end:
+end;
+
+proc _compile_variable_declaration();
+begin
+	_read_token();
+	sw a0, 0(sp)
+
+	_write_z(".type \0");
+	_write_token(v0);
+	_write_z(", @object\n\0");
+
+	_write_token(v0);
+	_write_c(':');
+
+	# Skip the variable name and colon with space before the type.
+	_advance_token(v0 + 2);
+
+	# Skip the type name.
+	_read_token();
 	_advance_token();
 
-	# Write the object definition itself.
-	_compile_line();
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, ' '
+	beq t0, t1, .compile_variable_declaration_initializer
+
+	# Else we assume this is a zeroed 81920 bytes big array.
+	_write_z(" .zero 81920\0");
+	goto .compile_variable_declaration_finalize;
 
-.compile_type_end:
+.compile_variable_declaration_initializer:
+	# Skip the assignment sign with surrounding whitespaces.
+	_advance_token(4);
+	_compile_global_initializer();
+	goto .compile_variable_declaration_finalize;
+
+.compile_variable_declaration_finalize:
+	# Skip semicolon and newline.
+	_advance_token(2);
+	_write_c('\n');
 end;
 
-proc _skip_newlines();
+proc _compile_var_part();
 begin
-	# Skip newlines.
+	_memcmp(source_code_position, "var\0", 3);
+	bnez a0, .compile_var_part_end
+
+	# Skip "var" and newline.
+	_advance_token(4);
+	_write_z(".section .data\n\0");
+
+.compile_var_part_loop:
 	la t0, source_code_position
-	lw t1, (t0)
+	lw t0, (t0)
+	lb t0, (t0)
 
-.skip_newlines_loop:
-	lb t2, (t1)
-	li t3, '\n'
-	bne t2, t3, .skip_newlines_end
-	beqz t2, .skip_newlines_end
+	li t1, 'p'
+	beq t0, t1, .compile_var_part_end
 
-	addi t1, t1, 1
-	sw t1, (t0)
+	li t1, '\t'
+	beq t0, t1, .compile_var_part_declaration
 
-	goto .skip_newlines_loop;
+	_compile_line();
+	goto .compile_var_part_loop;
 
-.skip_newlines_end:
+.compile_var_part_declaration:
+	_advance_token(1);
+	_compile_variable_declaration();
+	goto .compile_var_part_loop;
+
+.compile_var_part_end:
 end;
 
 # Process the source code and print the generated code.
 proc _compile_module();
 begin
+	_compile_const_part();
+	_write_z(".section .bss\n\0");
+
+.compile_module_bss:
+	la t0, source_code_position
+	lw t0, (t0)
+	lb t0, (t0)
+	li t1, 'v'
+	beq t0, t1, .compile_module_code
+
+	li t1, 'p'
+	beq t0, t1, .compile_module_code
+
+	_compile_line();
+	goto .compile_module_bss;
+
+.compile_module_code:
+	_compile_var_part();
+	_write_z(".section .text\n\0");
 .compile_module_loop:
 	_skip_newlines();
 
@@ -1163,10 +1399,6 @@ begin
 	_memcmp(source_code_position, ".section", 8);
 	beqz a0, .compile_module_section
 
-	# 5 is ".type" length.
-	_memcmp(source_code_position, ".type", 5);
-	beqz a0, .compile_module_type
-
 	# 5 is "proc " length. Space is needed to distinguish from "procedure".
 	_memcmp(source_code_position, "proc ", 5);
 	beqz a0, .compile_module_procedure
@@ -1183,11 +1415,6 @@ begin
 
 	goto .compile_module_loop;
 
-.compile_module_type:
-	_compile_type();
-
-	goto .compile_module_loop;
-
 .compile_module_global:
 	_compile_line();
 
@@ -1208,6 +1435,7 @@ end;
 
 proc _compile();
 begin
+	_write_z(".globl _start\n\n\0");
 	_compile_module();
 
 	_write_z(".section .rodata\n.type strings, @object\nstrings: .ascii \0");