13 Commits

18 changed files with 12473 additions and 4296 deletions

View File

@@ -1,9 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
source 'https://rubygems.org'
gem 'term-ansicolor', '~> 1.2'
gem 'rake', '~> 13.2'

View File

@@ -1,22 +0,0 @@
GEM
remote: https://rubygems.org/
specs:
bigdecimal (3.1.9)
rake (13.2.1)
sync (0.5.0)
term-ansicolor (1.11.2)
tins (~> 1.0)
tins (1.38.0)
bigdecimal
sync
PLATFORMS
ruby
x86_64-linux
DEPENDENCIES
rake (~> 13.2)
term-ansicolor (~> 1.2)
BUNDLED WITH
2.6.7

View File

@@ -5,18 +5,98 @@
require 'open3'
require 'rake/clean'
require 'term/ansicolor'
CLEAN.include 'build/boot'
STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
CLEAN.include 'build/boot', 'build/valid'
directory 'build/boot'
directory 'build/valid'
def compile(input, output)
sh ENV.fetch('CC', 'gcc'), '-nostdlib', '-fpie', '-g', '-o', output, input
end
def run(exe)
ENV.fetch('QEMU', '').split << exe
end
task default: :boot
desc 'Final stage'
task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
exe, previous_output, source = t.prerequisites
task boot: "build/valid/#{STAGES.last}"
task boot: "build/valid/#{STAGES.last}.s"
task boot: "boot/#{STAGES.last}.elna" do |t|
groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first)
exe = groupped['']
expected = groupped['.s']
source = groupped['.elna']
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
diff_arguments = ['diff', '-Nur', '--text', expected, '-']
Open3.pipeline(cat_arguments, run(exe), diff_arguments)
end
desc 'Convert previous stage language into the current stage language'
task :convert do
File.open('boot/stage9.elna', 'w') do |current_stage|
File.readlines('boot/stage8.elna').each do |line|
comment_match = /^(\s*)#(.*)/.match line
if comment_match.nil?
current_stage << line
elsif comment_match[2].empty?
current_stage << "\n"
else
current_stage << "#{comment_match[1]}(* #{comment_match[2].strip} *)\n"
end
end
end
end
rule /^build\/[[:alpha:]]+\/stage[[:digit:]]+$/ => ->(match) {
"#{match}.s"
} do |t|
compile(*t.prerequisites, t.name)
end
STAGES.each do |stage|
previous = stage.delete_prefix('stage').to_i.pred
file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
IO.copy_stream last_stdout, t.name
end
file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t|
exe, source = t.prerequisites
cat_arguments = ['cat', source]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
IO.copy_stream last_stdout, t.name
end
end
#
# Stage 1.
#
file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t|
source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
cat_arguments = ['cat', *source]
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe.first))
IO.copy_stream last_stdout, t.name
end
file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t|
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
compile(*source, t.name)
end

View File

@@ -1,502 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_out, _read_file, _write_error, _put_char, _printi
.global _get, _memcmp, _memchr, _memmem, _memcpy
.global _divide_by_zero_error, _exit
.global _strings_index
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
.equ SYS_EXIT, 93
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
new_line: .ascii "\n"
.section .text
# Write the current token to stderr. Ends the output with a newline.
#
# a0 - String pointer.
# a1 - String length.
.type _write_error, @function
_write_error:
mv t0, a0
mv t1, a1
li a0, STDERR
mv a1, t0
mv a2, t1
li a7, SYS_WRITE
ecall
li a0, STDERR
la a1, new_line
li a2, 1
li a7, SYS_WRITE
ecall
ret
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
.type _memcmp, @function
_memcmp:
mv t0, a0
li a0, 0
.Lmemcmp_loop:
beqz a2, .Lmemcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .Lmemcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
j .Lmemcmp_loop
.Lmemcmp_end:
ret
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_upper, @function
_is_upper:
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
ret
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_lower, @function
_is_lower:
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
ret
# Detects if the passed character is a 7-bit alpha character or an underscore.
# The character is passed in a0.
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
.type _is_alpha, @function
_is_alpha:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_upper
sw a0, 0(sp)
lw a0, 4(sp)
call _is_lower
lw t0, 4(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 0(sp)
or a0, a0, t0
or a0, a0, t1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
.type _is_digit, @function
_is_digit:
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
ret
.type _is_alnum, @function
_is_alnum:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_alpha
sw a0, 0(sp)
lw a0, 4(sp)
call _is_digit
lw a1, 0(sp)
or a0, a0, a1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
.type _write_out, @function
_write_out:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv a2, a1
mv a1, a0
li a0, STDOUT
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Sets s1 to the buffer passed in a0.
#
# Returns the amount of bytes written in a0.
.type _read_file, @function
_read_file:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv s1, a0
li a0, STDIN
mv a2, a1
mv a1, s1
li a7, SYS_READ
ecall
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
.type _exit, @function
_exit:
li a7, SYS_EXIT
ecall
# ret
.type _divide_by_zero_error, @function
_divide_by_zero_error:
addi a7, zero, 172 # getpid
ecall
addi a1, zero, 8 # SIGFPE
addi a7, zero, 129 # kill
ecall
ret
# a0 - Whole number.
# t1 - Constant 10.
# a1 - Local buffer.
# t2 - Current character.
# t3 - Whether the number is negative.
.type printi, @function
_printi:
addi sp, sp, -16
sw s0, 0(sp)
sw ra, 4(sp)
addi s0, sp, 16
addi t1, zero, 10
addi a1, s0, -1
addi t3, zero, 0
bge a0, zero, .digit10
addi t3, zero, 1
sub a0, zero, a0
.digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(a1)
div a0, a0, t1
addi a1, a1, -1
bne zero, a0, .digit10
beq zero, t3, .write_call
addi t2, zero, '-'
sb t2, 0(a1)
addi a1, a1, -1
.write_call:
addi a0, zero, 1
addi a1, a1, 1
sub a2, s0, a1
addi a7, zero, 64 # write
ecall
lw s0, 0(sp)
lw ra, 4(sp)
addi sp, sp, 16
ret
# Writes a character from a0 into the standard output.
.type _put_char, @function
_put_char:
# Prologue
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sb a0, 4(sp)
li a0, STDOUT
addi a1, sp, 4
li a2, 1
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
add sp, sp, 16
ret
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
.type _get, @function
_get:
lw a0, (a0)
ret
# Searches for the occurences of a character in the given memory block.
#
# Parameters:
# a0 - Memory block.
# a1 - Needle.
# a2 - Memory size.
#
# Sets a0 to the pointer to the found character or to null if the character
# doesn't occur in the memory block.
.type _memchr, @function
_memchr:
.Lmemchr_loop:
beqz a2, .Lmemchr_nil # Exit if the length is 0.
lbu t0, (a0) # Load the character from the memory block.
beq t0, a1, .Lmemchr_end # Exit if the character was found.
# Otherwise, continue with the next character.
addi a0, a0, 1
addi a2, a2, -1
j .Lmemchr_loop
.Lmemchr_nil:
li a0, 0
.Lmemchr_end:
ret
# Locates a substring.
#
# Parameters:
# a0 - Haystack.
# a1 - Haystack size.
# a2 - Needle.
# a3 - Needle size.
#
# Sets a0 to the pointer to the beginning of the substring in memory or to 0
# if the substring doesn't occur in the block.
.type _memmem, @function
_memmem:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
# Save preserved registers. They are used to keep arguments.
sw s1, 12(sp)
sw s2, 8(sp)
sw s3, 4(sp)
sw s4, 0(sp)
mv s1, a0
mv s2, a1
mv s3, a2
mv s4, a3
.Lmemmem_loop:
blt s2, s3, .Lmemmem_nil # Exit if the needle length is greater than memory.
mv a0, s1
mv a1, s3
mv a2, s4
call _memcmp
mv t0, a0 # memcmp result.
mv a0, s1 # Memory pointer for the case the substring was found.
beqz t0, .Lmemmem_end
addi s1, s1, 1
add s2, s2, -1
j .Lmemmem_loop
.Lmemmem_nil:
li a0, 0
.Lmemmem_end:
# Restore the preserved registers.
lw s1, 12(sp)
lw s2, 8(sp)
lw s3, 4(sp)
lw s4, 0(sp)
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
add sp, sp, 24
ret
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
.type _memcpy, @function
_memcpy:
mv t0, a0
.Lmemcpy_loop:
beqz a2, .Lmemcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
j .Lmemcpy_loop
.Lmemcpy_end:
mv a0, t0
ret
# Searches for a string in a string array.
#
# Parameters:
# a0 - Number of elements in the string array.
# a1 - String array.
# a2 - Needle length.
# a3 - Needle.
#
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
# element could not be found.
.type _strings_index, @function
_strings_index:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp)
mv s1, a0
sw s2, 16(sp)
mv s2, a1
sw s3, 12(sp)
mv s3, a2
sw s4, 8(sp)
mv s4, a3
sw s5, 4(sp)
li s5, 0 # Index counter.
.Lstrings_index_loop:
addi s5, s5, 1
beqz s1, .Lstrings_index_missing
lw a2, (s2) # Read the length of the current element in the haystack.
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
addi a0, s2, 4
mv a1, s4
call _memcmp
beqz a0, .Lstrings_index_end
.Lstrings_index_next:
# Advance the pointer, reduce the length.
lw a2, (s2)
addi s2, s2, 4
add s2, s2, a2
addi s1, s1, -1
j .Lstrings_index_loop
.Lstrings_index_missing:
li s5, 0
.Lstrings_index_end:
mv a0, s5
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
lw s4, 8(sp)
lw s5, 4(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
add sp, sp, 32
ret

View File

@@ -1,63 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# Tokens.
#
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
.equ TOKEN_IMPORT, 2
.equ TOKEN_CONST, 3
.equ TOKEN_VAR, 4
.equ TOKEN_IF, 5
.equ TOKEN_THEN, 6
.equ TOKEN_ELSIF, 7
.equ TOKEN_ELSE, 8
.equ TOKEN_WHILE, 9
.equ TOKEN_DO, 10
.equ TOKEN_PROC, 11
.equ TOKEN_BEGIN, 12
.equ TOKEN_END, 13
.equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15
.equ TOKEN_UNION, 16
.equ TOKEN_TRUE, 17
.equ TOKEN_FALSE, 18
.equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20
.equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23
.equ TOKEN_GOTO, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

971
boot/stage3.elna Normal file
View File

@@ -0,0 +1,971 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# Stage 3 compiler.
#
# - Procedures with multiple arguments.
# - Character literals with and without escaping.
.section .rodata
.type keyword_section, @object
keyword_section: .ascii ".section"
.type keyword_type, @object
keyword_type: .ascii ".type"
.type keyword_ret, @object
keyword_ret: .ascii "ret"
.type keyword_global, @object
keyword_global: .ascii ".globl"
.type keyword_proc, @object
keyword_proc: .ascii "proc "
.type keyword_end, @object
keyword_end: .ascii "end"
.type keyword_begin, @object
keyword_begin: .ascii "begin"
.type keyword_var, @object
keyword_var: .ascii "var"
.type asm_prologue, @object
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
.type asm_epilogue, @object
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
.type asm_type_directive, @object
asm_type_directive: .string ".type "
.type asm_type_function, @object
asm_type_function: .string ", @function\n"
.type asm_colon, @object
asm_colon: .string ":\n"
.type asm_call, @object
asm_call: .string "\tcall "
.type asm_j, @object
asm_j: .string "\tj "
.type asm_li, @object
asm_li: .string "\tli "
.type asm_lw, @object
asm_lw: .string "\tlw "
.type asm_sw, @object
asm_sw: .string "\tsw "
.type asm_mv, @object
asm_mv: .string "mv "
.type asm_t0, @object
asm_t0: .string "t0"
.type asm_a0, @object
asm_a0: .string "a0"
.type asm_comma, @object
asm_comma: .string ", "
.type asm_sp, @object
asm_sp: .string "(sp)"
.section .bss
# When modifiying also change the read size in the entry point procedure.
.type source_code, @object
source_code: .zero 81920
.section .data
.type source_code_position, @object
source_code_position: .word source_code
.section .text
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Returns the amount of bytes written in a0.
proc _read_file();
begin
mv a2, a1
mv a1, a0
# STDIN.
li a0, 0
li a7, 63 # SYS_READ.
ecall
end;
# Writes to the standard output.
#
# Parameters:
# a0 - Buffer.
# a1 - Buffer length.
proc _write_s();
begin
mv a2, a1
mv a1, a0
# STDOUT.
li a0, 1
li a7, 64 # SYS_WRITE.
ecall
end;
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
proc _print_i();
begin
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .print_i_digit10
li t3, 1
neg a0, a0
.print_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .print_i_digit10
beq zero, t3, .print_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.print_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
_memcpy();
lw a0, 0(sp)
end;
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
proc _write_i();
begin
addi a1, sp, 0
_print_i();
mv a1, a0
addi a0, sp, 0
_write_s();
end;
# Writes a character from a0 into the standard output.
proc _write_c();
begin
sb a0, 0(sp)
addi a0, sp, 0
li a1, 1
_write_s();
end;
# Write null terminated string.
#
# Parameters:
# a0 - String.
proc _write_z();
begin
sw a0, 0(sp)
.write_z_loop:
# Check for 0 character.
lb a0, (a0)
beqz a0, .write_z_end
# Print a character.
lw a0, 0(sp)
lb a0, (a0)
_write_c();
# Advance the input string by one byte.
lw a0, 0(sp)
addi a0, a0, 1
sw a0, 0(sp)
goto .write_z_loop;
.write_z_end:
end;
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_upper();
begin
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
end;
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
proc _is_lower();
begin
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
end;
# Detects if the passed character is a 7-bit alpha character or an underscore.
#
# Paramters:
# a0 - Tested character.
#
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
proc _is_alpha();
begin
sw a0, 0(sp)
_is_upper();
sw a0, 4(sp)
_is_lower(v00);
lw t0, 0(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 4(sp)
or a0, a0, t0
or a0, a0, t1
end;
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
proc _is_digit();
begin
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
end;
proc _is_alnum();
begin
sw a0, 4(sp)
_is_alpha();
sw a0, 0(sp)
_is_digit(v04);
lw a1, 0(sp)
or a0, a0, a1
end;
# Reads the next token.
#
# Returns token length in a0.
proc _read_token();
begin
la t0, source_code_position # Token pointer.
lw t0, (t0)
sw t0, 0(sp) # Current token position.
sw zero, 4(sp) # Token length.
.read_token_loop:
lb t0, (t0) # Current character.
# First we try to read a derictive.
# A derictive can contain a dot and characters.
li t1, '.'
beq t0, t1, .read_token_next
lw a0, 0(sp)
lb a0, (a0)
_is_alnum();
bnez a0, .read_token_next
goto .read_token_end;
.read_token_next:
# Advance the source code position and token length.
lw t0, 4(sp)
addi t0, t0, 1
sw t0, 4(sp)
lw t0, 0(sp)
addi t0, t0, 1
sw t0, 0(sp)
goto .read_token_loop;
.read_token_end:
lw a0, 4(sp)
end;
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
proc _memcmp();
begin
mv t0, a0
li a0, 0
.memcmp_loop:
beqz a2, .memcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .memcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcmp_loop;
.memcmp_end:
end;
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
proc _memcpy();
begin
mv t0, a0
.memcpy_loop:
beqz a2, .memcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
goto .memcpy_loop
.memcpy_end:
mv a0, t0
end;
# Advances the token stream by a0 bytes.
proc _advance_token();
begin
la t0, source_code_position
lw t1, (t0)
add t1, t1, a0
sw t1, (t0)
end;
# Prints the current token.
#
# Parameters:
# a0 - Token length.
#
# Returns a0 unchanged.
proc _write_token();
begin
sw a0, 0(sp)
la a0, source_code_position
lw a0, (a0)
lw a1, 0(sp)
_write_s();
lw a0, 0(sp)
end;
proc _compile_section();
begin
# Print and skip the ".section" (8 characters) directive and a space after it.
_write_token(9);
_advance_token();
# Read the section name.
_read_token();
addi a0, a0, 1
_write_token();
_advance_token();
end;
# Prints and skips a line.
proc _skip_comment();
begin
la t0, source_code_position
lw t1, (t0)
.skip_comment_loop:
# Check for newline character.
lb t2, (t1)
li t3, '\n'
beq t2, t3, .skip_comment_end
# Advance the input string by one byte.
addi t1, t1, 1
sw t1, (t0)
goto .skip_comment_loop;
.skip_comment_end:
# Skip the newline.
addi t1, t1, 1
sw t1, (t0)
end;
# Prints and skips a line.
proc _compile_line();
begin
.compile_line_loop:
la a0, source_code_position
lw a1, (a0)
lb t0, (a1)
li t1, '\n'
beq t0, t1, .compile_line_end
# Print a character.
lw a0, (a1)
_write_c();
# Advance the input string by one byte.
_advance_token(1);
goto .compile_line_loop;
.compile_line_end:
_write_c('\n');
_advance_token(1);
end;
proc _compile_integer_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_read_token();
_write_token();
_advance_token();
_write_c('\n');
end;
proc _compile_character_literal();
begin
la a0, asm_li
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
li a0, '\''
_write_c();
_advance_token(1);
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\\'
bne a0, t1, .compile_character_literal_end
li a0, '\\'
_write_c();
_advance_token(1);
.compile_character_literal_end:
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
_write_c();
li a0, '\''
_write_c();
_write_c('\n');
_advance_token(2);
end;
proc _compile_variable_expression();
begin
la a0, asm_lw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
_advance_token(1);
_read_token();
_write_token();
_advance_token();
la a0, asm_sp
_write_z();
_write_c('\n');
end;
proc _compile_expression();
begin
la t0, source_code_position
lw t0, (t0)
lb a0, (t0)
li t1, '\''
beq a0, t1, .compile_expression_character_literal
li t1, 'v'
beq a0, t1, .compile_expression_variable
_is_digit();
bnez a0, .compile_expression_integer_literal
goto .compile_expression_end;
.compile_expression_character_literal:
_compile_character_literal();
goto .compile_expression_end;
.compile_expression_integer_literal:
_compile_integer_literal();
goto .compile_expression_end;
.compile_expression_variable:
_compile_variable_expression();
goto .compile_expression_end;;
.compile_expression_end:
end;
proc _compile_call();
begin
# Stack variables:
# v0 - Procedure name length.
# v4 - Procedure name pointer.
# v8 - Argument count.
_read_token();
sw a0, 0(sp)
la t0, source_code_position
lw t0, (t0)
sw t0, 4(sp)
sw zero, 8(sp)
# Skip the identifier and left paren.
addi a0, a0, 1
_advance_token();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ')'
beq t0, t1, .compile_call_finalize
.compile_call_loop:
_compile_expression();
# Save the argument on the stack.
la a0, asm_sw
_write_z();
la a0, asm_a0
_write_z();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
# Add one to the argument counter.
lw t0, 8(sp)
addi t0, t0, 1
sw t0, 8(sp)
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
li t1, ','
bne t0, t1, .compile_call_finalize
_advance_token(2);
goto .compile_call_loop;
.compile_call_finalize:
# Load the argument from the stack.
lw t0, 8(sp)
beqz t0, .compile_call_end
# Decrement the argument counter.
lw t0, 8(sp)
addi t0, t0, -1
sw t0, 8(sp)
la a0, asm_lw
_write_z();
_write_c('a');
lw a0, 8(sp)
_write_i();
la a0, asm_comma
_write_z();
# Calculate the stack offset: 20 - (4 * argument_counter)
lw t0, 8(sp)
li t1, 4
mul t0, t0, t1
li t1, 20
sub a0, t1, t0
_write_i();
la a0, asm_sp
_write_z();
_write_c('\n');
goto .compile_call_finalize;
.compile_call_end:
la a0, asm_call
_write_z();
lw a0, 4(sp)
lw a1, 0(sp)
_write_s();
# Skip the right paren.
_advance_token(1);
end;
proc _compile_goto();
begin
_advance_token(5);
_read_token();
sw a0, 0(sp)
la a0, asm_j
_write_z();
_write_token(v00);
_advance_token();
end;
proc _compile_statement();
begin
# This is a call if the statement starts with an underscore.
la t0, source_code_position
lw t0, (t0)
# First character after alignment tab.
addi t0, t0, 1
lb t0, (t0)
li t1, '_'
beq t0, t1, .compile_statement_call
li t1, 'g'
beq t0, t1, .compile_statement_goto
_compile_line();
goto .compile_statement_end;
.compile_statement_call:
_advance_token(1);
_compile_call();
goto .compile_statement_semicolon;
.compile_statement_goto:
_advance_token(1);
_compile_goto();
goto .compile_statement_semicolon;
.compile_statement_semicolon:
_advance_token(2);
_write_c('\n');
.compile_statement_end:
end;
proc _compile_procedure_body();
begin
.compile_procedure_body_loop:
la a0, source_code_position
lw a0, (a0)
la a1, keyword_end
li a2, 3 # "end" length.
_memcmp();
beqz a0, .compile_procedure_body_epilogue
_compile_statement();
goto .compile_procedure_body_loop;
.compile_procedure_body_epilogue:
end;
proc _compile_procedure();
begin
# Skip "proc ".
_advance_token(5);
_read_token();
sw a0, 0(sp) # Save the procedure name length.
# Write .type _procedure_name, @function.
la a0, asm_type_directive
_write_z();
_write_token(v00);
la a0, asm_type_function
_write_z();
# Write procedure label, _procedure_name:
_write_token(v00);
la a0, asm_colon
_write_z();
# Skip the function name and trailing parens, semicolon, "begin" and newline.
lw a0, 0(sp)
addi a0, a0, 10
_advance_token();
la a0, asm_prologue
_write_z();
_compile_procedure_body();
# Write the epilogue.
la a0, asm_epilogue
_write_z();
# Skip the "end" keyword, semicolon and newline.
_advance_token(5);
end;
proc _compile_type();
begin
# Print and skip the ".type" (5 characters) directive and a space after it.
_write_token(6);
_advance_token();
# Read and print the symbol name.
_read_token();
# Print and skip the symbol name, comma, space and @.
addi a0, a0, 3
_write_token();
_advance_token();
# Read the symbol type.
_read_token();
la t0, source_code_position
lw t0, (t0)
sw t0, 12(sp)
# Print the symbol type and newline.
addi a0, a0, 1
_write_token();
_advance_token();
# Write the object definition itself.
_compile_line();
.compile_type_end:
end;
proc _skip_newlines();
begin
# Skip newlines.
la t0, source_code_position
lw t1, (t0)
.skip_newlines_loop:
lb t2, (t1)
li t3, '\n'
bne t2, t3, .skip_newlines_end
beqz t2, .skip_newlines_end
addi t1, t1, 1
sw t1, (t0)
goto .skip_newlines_loop;
.skip_newlines_end:
end;
# Process the source code and print the generated code.
proc _compile();
begin
.compile_loop:
_skip_newlines();
la t0, source_code_position
lw t0, (t0)
lb t0, (t0)
beqz t0, .compile_end
li t1, '#'
beq t0, t1, .compile_comment
la a0, source_code_position
lw a0, (a0)
la a1, keyword_section
li a2, 8 # ".section" length.
_memcmp();
beqz a0, .compile_section
la a0, source_code_position
lw a0, (a0)
la a1, keyword_type
li a2, 5 # ".type" length.
_memcmp();
beqz a0, .compile_type
la a0, source_code_position
lw a0, (a0)
la a1, keyword_proc
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
_memcmp();
beqz a0, .compile_procedure
la a0, source_code_position
lw a0, (a0)
la a1, keyword_global
li a2, 6 # ".globl" length.
_memcmp();
beqz a0, .compile_global
# Not a known token, exit.
goto .compile_end;
.compile_section:
_compile_section();
goto .compile_loop;
.compile_type:
_compile_type();
goto .compile_loop;
.compile_global:
_compile_line();
goto .compile_loop;
.compile_comment:
_skip_comment();
goto .compile_loop;
.compile_procedure:
_compile_procedure();
goto .compile_loop;
.compile_end:
end;
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
proc _exit();
begin
li a7, 93 # SYS_EXIT
ecall
end;
# Entry point.
.globl _start
proc _start();
begin
# Read the source from the standard input.
la a0, source_code
li a1, 81920 # Buffer size.
_read_file();
_compile();
_exit(0);
end;

1129
boot/stage4.elna Normal file

File diff suppressed because it is too large Load Diff

1487
boot/stage5.elna Normal file

File diff suppressed because it is too large Load Diff

1588
boot/stage6.elna Normal file

File diff suppressed because it is too large Load Diff

1488
boot/stage7.elna Normal file

File diff suppressed because it is too large Load Diff

1979
boot/stage8.elna Normal file

File diff suppressed because it is too large Load Diff

1944
boot/stage9.elna Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,9 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table_build, symbol_table_find
.global symbol_table
.global symbol_table_build, symbol_table_lookup, symbol_table_enter, symbol_table_dump
.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local, symbol_table_make_procedure
.include "boot/definitions.inc"
@@ -18,6 +20,8 @@ symbol_builtin_name_word: .ascii "Word"
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
.type symbol_builtin_name_bool, @object
symbol_builtin_name_bool: .ascii "Bool"
# Every type info starts with a word describing what type it is.
@@ -34,6 +38,9 @@ symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_bool, @object
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
.word 1
.section .bss
@@ -49,16 +56,53 @@ symbol_table: .zero SYMBOL_PRIME
.section .text
# Prints the list of symbols in the table.
.type symbol_table_dump, @function
symbol_table_dump:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Current symbol in the table.
sw s2, 16(sp) # Symbol table length.
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_dump_loop:
beqz s2, .Lsymbol_table_dump_end
# Compare string lengths.
lw a0, 4(s1)
lw a1, 0(s1)
call _write_error
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_dump_loop
.Lsymbol_table_dump_end:
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Searches for a symbol by name.
#
# Parameters:
# a0 - Local symbol table or 0.
# a1 - Length of the symbol to search.
# a2 - Pointer to the symbol name.
# a0 - Length of the symbol to search.
# a1 - Pointer to the symbol name.
#
# Sets a0 to the symbol info.
.type symbol_table_find, @function
symbol_table_find:
# Sets a0 to the symbol info pointer or 0 if the symbol has not been found.
.type symbol_table_lookup, @function
symbol_table_lookup:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
@@ -70,38 +114,37 @@ symbol_table_find:
sw s3, 12(sp) # Length of the symbol to search.
sw s4, 8(sp) # Pointer to the symbol to search.
mv s3, a1
mv s4, a2
mv s3, a0
mv s4, a1
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_find_loop:
beqz s2, .Lsymbol_table_find_not_found
.Lsymbol_table_lookup_loop:
beqz s2, .Lsymbol_table_lookup_not_found
# Compare string lengths.
mv a0, s3
mv a1, s4
lw a2, 0(s1)
bne s3, a2, .Lsymbol_table_find_continue
lw a3, 4(s1)
call _string_equal
# If lengths match, compare the content.
mv a0, s4
lw a1, 4(s1)
call _memcmp
bnez a0, .Lsymbol_table_find_continue
beqz a0, .Lsymbol_table_lookup_continue
lw a0, 8(s1) # Pointer to the symbol.
j .Lsymbol_table_end
j .Lsymbol_table_lookup_end
.Lsymbol_table_find_continue:
.Lsymbol_table_lookup_continue:
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_find_loop
j .Lsymbol_table_lookup_loop
.Lsymbol_table_find_not_found:
.Lsymbol_table_lookup_not_found:
li a0, 0
.Lsymbol_table_end:
.Lsymbol_table_lookup_end:
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
@@ -113,56 +156,142 @@ symbol_table_find:
addi sp, sp, 32
ret
# Creates a pointer type.
#
# Parameters:
# a0 - Pointer to the base type.
# a1 - Output memory.
#
# Sets a0 to the size of newly created type in bytes.
.type symbol_table_make_pointer, @function
symbol_table_make_pointer:
li t0, TYPE_POINTER
sw t0, 0(a1)
sw a0, 4(a1)
li a0, 8
ret
# Creates a parameter info.
#
# Parameters:
# a0 - Pointer to the parameter type.
# a1 - Parameter offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_parameter, @function
symbol_table_make_parameter:
li t0, INFO_PARAMETER
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a local variable info.
#
# Parameters:
# a0 - Pointer to the variable type.
# a1 - Variable stack offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_local, @function
symbol_table_make_local:
li t0, INFO_LOCAL
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a procedure type and procedure info objects refering the type.
#
# Parameters:
# a0 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_procedure, @function
symbol_table_make_procedure:
li t0, TYPE_PROCEDURE
sw t0, 8(a0)
li t0, INFO_PROCEDURE
sw t0, 0(a0)
sw a0, 4(a0) # Procedure type stored in the same memory segment.
li a0, 12
ret
# Inserts a symbol into the table.
#
# Parameters:
# a0 - Symbol name length.
# a1 - Symbol name pointer.
# a2 - Symbol pointer.
.type symbol_table_enter, @function
symbol_table_enter:
la t0, symbol_table
lw t1, 0(t0) # Current table length.
li t2, 12 # Calculate the offset to the next entry.
mul t2, t1, t2
addi t2, t2, 4
add t2, t0, t2
sw a0, 0(t2)
sw a1, 4(t2)
sw a2, 8(t2)
addi t1, t1, 1 # Save the new length.
sw t1, 0(t0)
ret
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, symbol_table
addi t0, a0, 4
li t1, 3 # Length of the word "Int".
sw t1, 0(t0)
la t1, symbol_builtin_name_int
sw t1, 4(t0)
la t1, symbol_builtin_type_int
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 3 # Length of the word "Int".
la a1, symbol_builtin_name_int
la a2, symbol_builtin_type_int
call symbol_table_enter
li t1, 4 # Length of the word "Word".
sw t1, 0(t0)
la t1, symbol_builtin_name_word
sw t1, 4(t0)
la t1, symbol_builtin_type_word
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Word".
la a1, symbol_builtin_name_word
la a2, symbol_builtin_type_word
call symbol_table_enter
li t1, 4 # Length of the word "Byte".
sw t1, 0(t0)
la t1, symbol_builtin_name_byte
sw t1, 4(t0)
la t1, symbol_builtin_type_byte
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Byte".
la a1, symbol_builtin_name_byte
la a2, symbol_builtin_type_byte
call symbol_table_enter
li t1, 4 # Length of the word "Char".
sw t1, 0(t0)
la t1, symbol_builtin_name_char
sw t1, 4(t0)
la t1, symbol_builtin_type_char
sw t1, 8(t0)
lw t1, 0(a0)
addi t1, t1, 1
sw t1, 0(a0)
addi t0, t0, 12
li a0, 4 # Length of the word "Char".
la a1, symbol_builtin_name_char
la a2, symbol_builtin_type_char
call symbol_table_enter
li a0, 4 # Length of the word "Bool".
la a1, symbol_builtin_name_bool
la a2, symbol_builtin_type_bool
call symbol_table_enter
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret

View File

@@ -1,616 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _tokenize_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
.section .rodata
#
# Classification table assigns each possible character to a group (class). All
# characters of the same group a handled equivalently.
#
# Classification:
#
.equ CLASS_INVALID, 0x00
.equ CLASS_DIGIT, 0x01
.equ CLASS_CHARACTER, 0x02
.equ CLASS_SPACE, 0x03
.equ CLASS_COLON, 0x04
.equ CLASS_EQUALS, 0x05
.equ CLASS_LEFT_PAREN, 0x06
.equ CLASS_RIGHT_PAREN, 0x07
.equ CLASS_ASTERISK, 0x08
.equ CLASS_UNDERSCORE, 0x09
.equ CLASS_SINGLE, 0x0a
.equ CLASS_HEX, 0x0b
.equ CLASS_ZERO, 0x0c
.equ CLASS_X, 0x0d
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
.equ CLASS_QUOTE, 0x11
.equ CLASS_GREATER, 0x12
.equ CLASS_LESS, 0x13
.equ CLASS_COUNT, 20
.type classification, @object
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
.byte CLASS_INVALID # 02 STX
.byte CLASS_INVALID # 03 ETX
.byte CLASS_INVALID # 04 EOT
.byte CLASS_INVALID # 05 ENQ
.byte CLASS_INVALID # 06 ACK
.byte CLASS_INVALID # 07 BEL
.byte CLASS_INVALID # 08 BS
.byte CLASS_SPACE # 09 HT
.byte CLASS_SPACE # 0A LF
.byte CLASS_INVALID # 0B VT
.byte CLASS_INVALID # 0C FF
.byte CLASS_SPACE # 0D CR
.byte CLASS_INVALID # 0E SO
.byte CLASS_INVALID # 0F SI
.byte CLASS_INVALID # 10 DLE
.byte CLASS_INVALID # 11 DC1
.byte CLASS_INVALID # 12 DC2
.byte CLASS_INVALID # 13 DC3
.byte CLASS_INVALID # 14 DC4
.byte CLASS_INVALID # 15 NAK
.byte CLASS_INVALID # 16 SYN
.byte CLASS_INVALID # 17 ETB
.byte CLASS_INVALID # 18 CAN
.byte CLASS_INVALID # 19 EM
.byte CLASS_INVALID # 1A SUB
.byte CLASS_INVALID # 1B ESC
.byte CLASS_INVALID # 1C FS
.byte CLASS_INVALID # 1D GS
.byte CLASS_INVALID # 1E RS
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
.byte CLASS_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
.byte CLASS_SINGLE # 26 &
.byte CLASS_QUOTE # 27 '
.byte CLASS_LEFT_PAREN # 28 (
.byte CLASS_RIGHT_PAREN # 29 )
.byte CLASS_ASTERISK # 2A *
.byte CLASS_SINGLE # 2B +
.byte CLASS_SINGLE # 2C ,
.byte CLASS_MINUS # 2D -
.byte CLASS_DOT # 2E .
.byte CLASS_SINGLE # 2F /
.byte CLASS_ZERO # 30 0
.byte CLASS_DIGIT # 31 1
.byte CLASS_DIGIT # 32 2
.byte CLASS_DIGIT # 33 3
.byte CLASS_DIGIT # 34 4
.byte CLASS_DIGIT # 35 5
.byte CLASS_DIGIT # 36 6
.byte CLASS_DIGIT # 37 7
.byte CLASS_DIGIT # 38 8
.byte CLASS_DIGIT # 39 9
.byte CLASS_COLON # 3A :
.byte CLASS_SINGLE # 3B ;
.byte CLASS_LESS # 3C <
.byte CLASS_EQUALS # 3D =
.byte CLASS_GREATER # 3E >
.byte 0x00 # 3F ?
.byte CLASS_SINGLE # 40 @
.byte CLASS_CHARACTER # 41 A
.byte CLASS_CHARACTER # 42 B
.byte CLASS_CHARACTER # 43 C
.byte CLASS_CHARACTER # 44 D
.byte CLASS_CHARACTER # 45 E
.byte CLASS_CHARACTER # 46 F
.byte CLASS_CHARACTER # 47 G
.byte CLASS_CHARACTER # 48 H
.byte CLASS_CHARACTER # 49 I
.byte CLASS_CHARACTER # 4A J
.byte CLASS_CHARACTER # 4B K
.byte CLASS_CHARACTER # 4C L
.byte CLASS_CHARACTER # 4D M
.byte CLASS_CHARACTER # 4E N
.byte CLASS_CHARACTER # 4F O
.byte CLASS_CHARACTER # 50 P
.byte CLASS_CHARACTER # 51 Q
.byte CLASS_CHARACTER # 52 R
.byte CLASS_CHARACTER # 53 S
.byte CLASS_CHARACTER # 54 T
.byte CLASS_CHARACTER # 55 U
.byte CLASS_CHARACTER # 56 V
.byte CLASS_CHARACTER # 57 W
.byte CLASS_CHARACTER # 58 X
.byte CLASS_CHARACTER # 59 Y
.byte CLASS_CHARACTER # 5A Z
.byte CLASS_SINGLE # 5B [
.byte 0x00 # 5C \
.byte CLASS_SINGLE # 5D ]
.byte CLASS_SINGLE # 5E ^
.byte CLASS_UNDERSCORE # 5F _
.byte 0x00 # 60 `
.byte CLASS_HEX # 61 a
.byte CLASS_HEX # 62 b
.byte CLASS_HEX # 63 c
.byte CLASS_HEX # 64 d
.byte CLASS_HEX # 65 e
.byte CLASS_HEX # 66 f
.byte CLASS_CHARACTER # 67 g
.byte CLASS_CHARACTER # 68 h
.byte CLASS_CHARACTER # 69 i
.byte CLASS_CHARACTER # 6A j
.byte CLASS_CHARACTER # 6B k
.byte CLASS_CHARACTER # 6C l
.byte CLASS_CHARACTER # 6D m
.byte CLASS_CHARACTER # 6E n
.byte CLASS_CHARACTER # 6F o
.byte CLASS_CHARACTER # 70 p
.byte CLASS_CHARACTER # 71 q
.byte CLASS_CHARACTER # 72 r
.byte CLASS_CHARACTER # 73 s
.byte CLASS_CHARACTER # 74 t
.byte CLASS_CHARACTER # 75 u
.byte CLASS_CHARACTER # 76 v
.byte CLASS_CHARACTER # 77 w
.byte CLASS_X # 78 x
.byte CLASS_CHARACTER # 79 y
.byte CLASS_CHARACTER # 7A z
.byte 0x00 # 7B {
.byte CLASS_SINGLE # 7C |
.byte 0x00 # 7D }
.byte CLASS_SINGLE # 7E ~
.byte CLASS_INVALID # 7F DEL
#
# Textual keywords in the language.
#
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
.word 7
.ascii "program"
.word 6
.ascii "import"
.word 5
.ascii "const"
.word 3
.ascii "var"
.word 2
.ascii "if"
.word 4
.ascii "then"
.word 5
.ascii "elsif"
.word 4
.ascii "else"
.word 5
.ascii "while"
.word 2
.ascii "do"
.word 4
.ascii "proc"
.word 5
.ascii "begin"
.word 3
.ascii "end"
.word 4
.ascii "type"
.word 6
.ascii "record"
.word 5
.ascii "union"
.word 4
.ascii "true"
.word 5
.ascii "false"
.word 3
.ascii "nil"
.word 3
.ascii "xor"
.word 2
.ascii "or"
.word 6
.ascii "return"
.word 4
.ascii "cast"
.word 4
.ascii "goto"
.word 4
.ascii "case"
.word 2
.ascii "of"
.type byte_keywords, @object
byte_keywords: .ascii "&.,:;()[]^=+-*@"
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
.section .data
# The transition table describes transitions from one state to another, given
# a symbol (character class).
#
# The table has m rows and n columns, where m is the amount of states and n is
# the amount of classes. So given the current state and a classified character
# the table can be used to look up the next state.
#
# Each cell is a word long.
# - The least significant byte of the word is a row number (beginning with 0).
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the _tokenize_next function, which
# handles each action.
#
.type transitions, @object
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
.section .text
# Returns the class from the classification table for the given character.
#
# Parameters:
# a0 - Character.
#
# Sets a0 to the class number.
.type _classify, @function
_classify:
la t0, classification
add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class.
ret
# Given the current state and a character class, calculates the next state.
# Parameters:
# a0 - Current state.
# a1 - Character class.
#
# Sets a0 to the next state.
.type _lookup_state, @function
_lookup_state:
li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column.
li t0, 4
mul a0, a0, t0 # Multiply by the word size.
la t0, transitions
add t0, t0, a0
lw a0, (t0) # Next state.
ret
# Chains _classify and _lookup_state.
#
# Parameters:
# a0 - Current state.
# a1 - Character.
#
# Sets a0 to the next state based on the given character.
.type _next_state, @function
_next_state:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
mv a0, a1
call _classify
mv a1, a0
lw a0, 4(sp)
call _lookup_state
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes an identifier and checks whether it's a keyword.
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_identifier, @function
_classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a2, a0
mv a3, a1
li a0, KEYWORDS_COUNT
la a1, keywords
call _strings_index
bnez a0, .Lclassify_identifier_end
li a0, TOKEN_IDENTIFIER
.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes a symbol and determines its type.
#
# Parameters:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type _classify_single, @function
_classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a1, a0
li a2, BYTE_KEYWORDS_SIZE
la a0, byte_keywords
call _memchr
la a1, byte_keywords
sub a0, a0, a1
addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Classified a symbol containing multiple characters (probably 2).
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type _classify_composite, @function
_classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
j .Lclassify_composite_end
.Lclassify_composite_assign:
li a0, TOKEN_ASSIGN
j .Lclassify_composite_end
.Lclassify_composite_end:
ret
# Initializes the classification table.
#
# Paramaters:
# a0 - Source text pointer.
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type _tokenize_next, @function
_tokenize_next:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Ltokenize_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
li t0, 0xff
and s2, a0, t0 # Next state.
li t0, 0xff00
and t1, a0, t0 # Transition action.
srli t1, t1, 8
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Ltokenize_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Ltokenize_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Ltokenize_next_skip
li t0, 0x04 # Delimited string action.
beq t1, t0, .Ltokenize_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Ltokenize_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Ltokenize_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Ltokenize_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Ltokenize_next_integer
j .Ltokenize_next_reject
.Ltokenize_next_reject:
addi s1, s1, 1
j .Ltokenize_next_end
.Ltokenize_next_accumulate:
addi s1, s1, 1
j .Ltokenize_next_loop
.Ltokenize_next_skip:
addi s1, s1, 1
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 12(sp)
j .Ltokenize_next_loop
.Ltokenize_next_print:
/* DEBUG
addi a0, a0, 21
sw a0, 0(sp)
addi a0, sp, 0
li a1, 1
call _write_error */
j .Ltokenize_next_end
.Ltokenize_next_comment:
addi s1, s1, 1
j .Ltokenize_next_end
.Ltokenize_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call _classify_identifier
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Ltokenize_next_end
.Ltokenize_next_single:
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call _classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
.Ltokenize_next_composite:
addi s1, s1, 1
lw a1, 12(sp)
sub a0, s1, a1
call _classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Ltokenize_next_end
.Ltokenize_next_integer:
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
lw t1, 12(sp)
sw t1, 8(t0)
sub t1, s1, t1
sw t1, 4(t0)
j .Ltokenize_next_end
.Ltokenize_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret

View File

@@ -1,323 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. -}
# frozen_string_literal: true
require 'pathname'
require 'uri'
require 'net/http'
require 'rake/clean'
require 'open3'
require 'etc'
GCC_VERSION = "15.1.0"
BINUTILS_VERSION = '2.44'
GLIBC_VERSION = '2.41'
KERNEL_VERSION = '5.15.181'
CLOBBER.include 'build'
class BuildTarget
attr_accessor(:build, :gcc, :target, :tmp)
def gxx
@gcc.gsub 'c', '+'
end
def sysroot
tmp + 'sysroot'
end
def rootfs
tmp + 'rootfs'
end
def tools
tmp + 'tools'
end
end
def gcc_verbose(gcc_binary)
read, write = IO.pipe
sh({'LANG' => 'C'}, gcc_binary, '--verbose', err: write)
write.close
output = read.read
read.close
output
end
def find_build_target(gcc_version, task)
gcc_binary = 'gcc'
output = gcc_verbose gcc_binary
if output.start_with? 'Apple clang'
gcc_binary = "gcc-#{gcc_version.split('.').first}"
output = gcc_verbose gcc_binary
end
result = output
.lines
.each_with_object(BuildTarget.new) do |line, accumulator|
if line.start_with? 'Target: '
accumulator.build = line.split(' ').last.strip
elsif line.start_with? 'COLLECT_GCC'
accumulator.gcc = line.split('=').last.strip
end
end
result.tmp = Pathname.new('./build')
task.with_defaults target: 'riscv32-unknown-linux-gnu'
result.target = task[:target]
result
end
def download_and_unarchive(url, target)
case File.extname url.path
when '.bz2'
archive_type = '-j'
root_directory = File.basename url.path, '.tar.bz2'
when '.xz'
archive_type = '-J'
root_directory = File.basename url.path, '.tar.xz'
else
raise "Unsupported archive type #{url.path}."
end
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == 'https') do |http|
request = Net::HTTP::Get.new url.request_uri
http.request request do |response|
case response
when Net::HTTPRedirection
download_and_unarchive URI.parse(response['location'])
when Net::HTTPSuccess
Open3.popen2 'tar', '-C', target.to_path, archive_type, '-xv' do |stdin, stdout, wait_thread|
Thread.new do
stdout.each { |line| puts line }
end
response.read_body do |chunk|
stdin.write chunk
end
stdin.close
wait_thread.value
end
else
response.error!
end
end
end
target + root_directory
end
namespace :cross do
desc 'Build cross binutils'
task :binutils, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
options.tools.mkpath
source_directory = download_and_unarchive(
URI.parse("https://ftp.gnu.org/gnu/binutils/binutils-#{BINUTILS_VERSION}.tar.xz"),
options.tools)
cwd = source_directory.dirname + 'build-binutils'
cwd.mkpath
options.rootfs.mkpath
env = {
'CC' => options.gcc,
'CXX' => options.gxx
}
configure_options = [
"--prefix=#{options.rootfs.realpath}",
"--target=#{options.target}",
'--disable-nls',
'--enable-gprofng=no',
'--disable-werror',
'--enable-default-hash-style=gnu',
'--disable-libquadmath'
]
configure = source_directory.relative_path_from(cwd) + 'configure'
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', 'install', chdir: cwd.to_path
end
desc 'Build stage 1 GCC'
task :gcc1, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
options.tools.mkpath
source_directory = download_and_unarchive(
URI.parse("https://gcc.gnu.org/pub/gcc/releases/gcc-#{GCC_VERSION}/gcc-#{GCC_VERSION}.tar.xz"),
options.tools)
cwd = source_directory.dirname + 'build-gcc'
cwd.mkpath
options.rootfs.mkpath
options.sysroot.mkpath
sh 'contrib/download_prerequisites', chdir: source_directory.to_path
configure_options = [
"--prefix=#{options.rootfs.realpath}",
"--with-sysroot=#{options.sysroot.realpath}",
'--enable-languages=c,c++',
'--disable-shared',
'--with-arch=rv32imafdc',
'--with-abi=ilp32d',
'--with-tune=rocket',
'--with-isa-spec=20191213',
'--disable-bootstrap',
'--disable-multilib',
'--disable-libmudflap',
'--disable-libssp',
'--disable-libquadmath',
'--disable-libsanitizer',
'--disable-threads',
'--disable-libatomic',
'--disable-libgomp',
'--disable-libvtv',
'--disable-libstdcxx',
'--disable-nls',
'--with-newlib',
'--without-headers',
"--target=#{options.target}",
"--build=#{options.build}",
"--host=#{options.build}"
]
flags = '-O2 -fPIC'
env = {
'CC' => options.gcc,
'CXX' => options.gxx,
'CFLAGS' => flags,
'CXXFLAGS' => flags,
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}"
}
configure = source_directory.relative_path_from(cwd) + 'configure'
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', 'install', chdir: cwd.to_path
end
desc 'Copy glibc headers'
task :headers, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
options.tools.mkpath
source_directory = download_and_unarchive(
URI.parse("https://ftp.gnu.org/gnu/glibc/glibc-#{GLIBC_VERSION}.tar.xz"),
options.tools)
include_directory = options.tools + 'include'
include_directory.mkpath
cp (source_directory + 'elf/elf.h'), (include_directory + 'elf.h')
end
desc 'Build linux kernel'
task :kernel, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
options.tools.mkpath
cwd = download_and_unarchive(
URI.parse("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-#{KERNEL_VERSION}.tar.xz"),
options.tools)
env = {
'CROSS_COMPILE' => "#{options.target}-",
'ARCH' => 'riscv',
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}",
'HOSTCFLAGS' => "-D_UUID_T -D__GETHOSTUUID_H -I#{options.tools.realpath + 'include'}"
}
sh env, 'make', 'rv32_defconfig', chdir: cwd.to_path
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', 'headers', chdir: cwd.to_path
user_directory = options.sysroot + 'usr'
user_directory.mkpath
cp_r (cwd + 'usr/include'), (user_directory + 'include')
end
desc 'Build glibc'
task :glibc, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
source_directory = options.tools + "glibc-#{GLIBC_VERSION}"
configure_options = [
'--prefix=/usr',
"--host=#{options.target}",
"--target=#{options.target}",
"--build=#{options.build}",
"--enable-kernel=#{KERNEL_VERSION}",
"--with-headers=#{options.sysroot.realpath + 'usr/include'}",
'--disable-nscd',
'--disable-libquadmath',
'--disable-libitm',
'--disable-werror',
'libc_cv_forced_unwind=yes'
]
bin = options.rootfs.realpath + 'bin'
env = {
'PATH' => "#{bin}:#{ENV['PATH']}",
'MAKE' => 'make' # Otherwise it uses gnumake which can be different and too old.
}
cwd = source_directory.dirname + 'build-glibc'
cwd.mkpath
configure = source_directory.relative_path_from(cwd) +'./configure'
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', "install_root=#{options.sysroot.realpath}", 'install', chdir: cwd.to_path
end
desc 'Build stage 2 GCC'
task :gcc2, [:target] do |_, args|
options = find_build_target GCC_VERSION, args
source_directory = options.tools + "gcc-#{GCC_VERSION}"
cwd = options.tools + 'build-gcc'
rm_rf cwd
cwd.mkpath
configure_options = [
"--prefix=#{options.rootfs.realpath}",
"--with-sysroot=#{options.sysroot.realpath}",
'--enable-languages=c,c++,lto',
'--enable-lto',
'--enable-shared',
'--with-arch=rv32imafdc',
'--with-abi=ilp32d',
'--with-tune=rocket',
'--with-isa-spec=20191213',
'--disable-bootstrap',
'--disable-multilib',
'--enable-checking=release',
'--disable-libssp',
'--disable-libquadmath',
'--enable-threads=posix',
'--with-default-libstdcxx-abi=new',
'--disable-nls',
"--target=#{options.target}",
"--build=#{options.build}",
"--host=#{options.build}"
]
flags = '-O2 -fPIC'
env = {
'CFLAGS' => flags,
'CXXFLAGS' => flags,
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}"
}
configure = source_directory.relative_path_from(cwd) + 'configure'
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
sh env, 'make', 'install', chdir: cwd.to_path
end
end
desc 'Build cross toolchain'
task cross: [
'cross:binutils',
'cross:gcc1',
'cross:headers',
'cross:kernel',
'cross:glibc',
'cross:gcc2'
] do
end

View File

@@ -1,61 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. -}
# frozen_string_literal: true
CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
SYSROOT = 'build/sysroot'
QEMU = 'qemu-riscv32'
def assemble_stage(output, compiler, source)
arguments = [QEMU, '-L', SYSROOT, *compiler]
puts Term::ANSIColor.green(arguments * ' ')
puts
Open3.popen2(*arguments) do |qemu_in, qemu_out|
qemu_in.write File.read(*source)
qemu_in.close
IO.copy_stream qemu_out, output
qemu_out.close
end
end
library = []
Dir.glob('boot/*.s').each do |assembly_source|
source_basename = Pathname.new(assembly_source).basename
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
end
library << assembly_source unless source_basename.to_s.start_with? 'stage'
end
desc 'Initial stage'
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
file exe => [exe.ext('.s'), *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
end
file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end