Compare commits
13 Commits
92f50fff5f
...
assembly
Author | SHA1 | Date | |
---|---|---|---|
216dc59f0b
|
|||
d16ec370dc
|
|||
e0ac57dc1d
|
|||
4888252274
|
|||
4b42c59649
|
|||
ed3e0e043c
|
|||
44fa140769
|
|||
627975775c
|
|||
e614d43ea9
|
|||
1b31f532df
|
|||
d85183c7a6
|
|||
707f983fe9
|
|||
890486532c
|
9
Gemfile
9
Gemfile
@@ -1,9 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
# frozen_string_literal: true
|
||||
|
||||
source 'https://rubygems.org'
|
||||
|
||||
gem 'term-ansicolor', '~> 1.2'
|
||||
gem 'rake', '~> 13.2'
|
22
Gemfile.lock
22
Gemfile.lock
@@ -1,22 +0,0 @@
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
bigdecimal (3.1.9)
|
||||
rake (13.2.1)
|
||||
sync (0.5.0)
|
||||
term-ansicolor (1.11.2)
|
||||
tins (~> 1.0)
|
||||
tins (1.38.0)
|
||||
bigdecimal
|
||||
sync
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
x86_64-linux
|
||||
|
||||
DEPENDENCIES
|
||||
rake (~> 13.2)
|
||||
term-ansicolor (~> 1.2)
|
||||
|
||||
BUNDLED WITH
|
||||
2.6.7
|
94
Rakefile
94
Rakefile
@@ -5,18 +5,98 @@
|
||||
|
||||
require 'open3'
|
||||
require 'rake/clean'
|
||||
require 'term/ansicolor'
|
||||
|
||||
CLEAN.include 'build/boot'
|
||||
STAGES = Dir.glob('boot/stage*.elna').collect { |stage| File.basename stage, '.elna' }.sort
|
||||
|
||||
CLEAN.include 'build/boot', 'build/valid'
|
||||
|
||||
directory 'build/boot'
|
||||
directory 'build/valid'
|
||||
|
||||
def compile(input, output)
|
||||
sh ENV.fetch('CC', 'gcc'), '-nostdlib', '-fpie', '-g', '-o', output, input
|
||||
end
|
||||
|
||||
def run(exe)
|
||||
ENV.fetch('QEMU', '').split << exe
|
||||
end
|
||||
|
||||
task default: :boot
|
||||
|
||||
desc 'Final stage'
|
||||
task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
|
||||
exe, previous_output, source = t.prerequisites
|
||||
task boot: "build/valid/#{STAGES.last}"
|
||||
task boot: "build/valid/#{STAGES.last}.s"
|
||||
task boot: "boot/#{STAGES.last}.elna" do |t|
|
||||
groupped = t.prerequisites.group_by { |stage| File.extname stage }.transform_values(&:first)
|
||||
exe = groupped['']
|
||||
expected = groupped['.s']
|
||||
source = groupped['.elna']
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
|
||||
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
|
||||
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
|
||||
diff_arguments = ['diff', '-Nur', '--text', expected, '-']
|
||||
Open3.pipeline(cat_arguments, run(exe), diff_arguments)
|
||||
end
|
||||
|
||||
desc 'Convert previous stage language into the current stage language'
|
||||
task :convert do
|
||||
File.open('boot/stage9.elna', 'w') do |current_stage|
|
||||
File.readlines('boot/stage8.elna').each do |line|
|
||||
comment_match = /^(\s*)#(.*)/.match line
|
||||
|
||||
if comment_match.nil?
|
||||
current_stage << line
|
||||
elsif comment_match[2].empty?
|
||||
current_stage << "\n"
|
||||
else
|
||||
current_stage << "#{comment_match[1]}(* #{comment_match[2].strip} *)\n"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
rule /^build\/[[:alpha:]]+\/stage[[:digit:]]+$/ => ->(match) {
|
||||
"#{match}.s"
|
||||
} do |t|
|
||||
compile(*t.prerequisites, t.name)
|
||||
end
|
||||
|
||||
STAGES.each do |stage|
|
||||
previous = stage.delete_prefix('stage').to_i.pred
|
||||
|
||||
file "build/valid/#{stage}.s" => ["build/boot/#{stage}", "boot/#{stage}.elna"] do |t|
|
||||
exe, source = t.prerequisites
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
|
||||
file "build/boot/#{stage}.s" => ["build/valid/stage#{previous}", "boot/#{stage}.elna"] do |t|
|
||||
exe, source = t.prerequisites
|
||||
|
||||
cat_arguments = ['cat', source]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe))
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
end
|
||||
|
||||
#
|
||||
# Stage 1.
|
||||
#
|
||||
|
||||
file 'build/valid/stage1.s' => ['build/boot/stage1', 'boot/stage1.s', 'build/valid'] do |t|
|
||||
source, exe, = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.s' }
|
||||
|
||||
cat_arguments = ['cat', *source]
|
||||
last_stdout, wait_threads = Open3.pipeline_r(cat_arguments, run(exe.first))
|
||||
|
||||
IO.copy_stream last_stdout, t.name
|
||||
end
|
||||
|
||||
file 'build/boot/stage1' => ['build/boot', 'boot/stage1.s'] do |t|
|
||||
source = t.prerequisites.select { |prerequisite| prerequisite.end_with? '.s' }
|
||||
|
||||
compile(*source, t.name)
|
||||
end
|
||||
|
@@ -1,502 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
|
||||
.global _write_out, _read_file, _write_error, _put_char, _printi
|
||||
.global _get, _memcmp, _memchr, _memmem, _memcpy
|
||||
.global _divide_by_zero_error, _exit
|
||||
.global _strings_index
|
||||
|
||||
.section .rodata
|
||||
|
||||
.equ SYS_READ, 63
|
||||
.equ SYS_WRITE, 64
|
||||
.equ SYS_EXIT, 93
|
||||
.equ STDIN, 0
|
||||
.equ STDOUT, 1
|
||||
.equ STDERR, 2
|
||||
|
||||
new_line: .ascii "\n"
|
||||
|
||||
.section .text
|
||||
|
||||
# Write the current token to stderr. Ends the output with a newline.
|
||||
#
|
||||
# a0 - String pointer.
|
||||
# a1 - String length.
|
||||
.type _write_error, @function
|
||||
_write_error:
|
||||
mv t0, a0
|
||||
mv t1, a1
|
||||
|
||||
li a0, STDERR
|
||||
mv a1, t0
|
||||
mv a2, t1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
li a0, STDERR
|
||||
la a1, new_line
|
||||
li a2, 1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
ret
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
.type _memcmp, @function
|
||||
_memcmp:
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.Lmemcmp_loop:
|
||||
beqz a2, .Lmemcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .Lmemcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemcmp_loop
|
||||
|
||||
.Lmemcmp_end:
|
||||
ret
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
.type _is_upper, @function
|
||||
_is_upper:
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
|
||||
ret
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
.type _is_lower, @function
|
||||
_is_lower:
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
|
||||
ret
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
# The character is passed in a0.
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
.type _is_alpha, @function
|
||||
_is_alpha:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
|
||||
call _is_upper
|
||||
sw a0, 0(sp)
|
||||
|
||||
lw a0, 4(sp)
|
||||
call _is_lower
|
||||
|
||||
lw t0, 4(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 0(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
.type _is_digit, @function
|
||||
_is_digit:
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
|
||||
ret
|
||||
|
||||
.type _is_alnum, @function
|
||||
_is_alnum:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
|
||||
call _is_alpha
|
||||
sw a0, 0(sp)
|
||||
|
||||
lw a0, 4(sp)
|
||||
call _is_digit
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.type _write_out, @function
|
||||
_write_out:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
li a0, STDOUT
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Sets s1 to the buffer passed in a0.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
.type _read_file, @function
|
||||
_read_file:
|
||||
# Prologue.
|
||||
addi sp, sp, -8
|
||||
sw ra, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
addi s0, sp, 8
|
||||
|
||||
mv s1, a0
|
||||
|
||||
li a0, STDIN
|
||||
mv a2, a1
|
||||
mv a1, s1
|
||||
li a7, SYS_READ
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 8
|
||||
ret
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
.type _exit, @function
|
||||
_exit:
|
||||
li a7, SYS_EXIT
|
||||
ecall
|
||||
# ret
|
||||
|
||||
.type _divide_by_zero_error, @function
|
||||
_divide_by_zero_error:
|
||||
addi a7, zero, 172 # getpid
|
||||
ecall
|
||||
|
||||
addi a1, zero, 8 # SIGFPE
|
||||
addi a7, zero, 129 # kill
|
||||
ecall
|
||||
ret
|
||||
|
||||
# a0 - Whole number.
|
||||
# t1 - Constant 10.
|
||||
# a1 - Local buffer.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
.type printi, @function
|
||||
_printi:
|
||||
addi sp, sp, -16
|
||||
sw s0, 0(sp)
|
||||
sw ra, 4(sp)
|
||||
addi s0, sp, 16
|
||||
addi t1, zero, 10
|
||||
addi a1, s0, -1
|
||||
|
||||
addi t3, zero, 0
|
||||
bge a0, zero, .digit10
|
||||
addi t3, zero, 1
|
||||
sub a0, zero, a0
|
||||
|
||||
.digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(a1)
|
||||
div a0, a0, t1
|
||||
addi a1, a1, -1
|
||||
bne zero, a0, .digit10
|
||||
|
||||
beq zero, t3, .write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(a1)
|
||||
addi a1, a1, -1
|
||||
|
||||
.write_call:
|
||||
addi a0, zero, 1
|
||||
addi a1, a1, 1
|
||||
sub a2, s0, a1
|
||||
addi a7, zero, 64 # write
|
||||
ecall
|
||||
|
||||
lw s0, 0(sp)
|
||||
lw ra, 4(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
.type _put_char, @function
|
||||
_put_char:
|
||||
# Prologue
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sb a0, 4(sp)
|
||||
li a0, STDOUT
|
||||
addi a1, sp, 4
|
||||
li a2, 1
|
||||
li a7, SYS_WRITE
|
||||
ecall
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
add sp, sp, 16
|
||||
ret
|
||||
|
||||
# a0 - Pointer to an array to get the first element.
|
||||
#
|
||||
# Dereferences a pointer and returns what is on the address in a0.
|
||||
.type _get, @function
|
||||
_get:
|
||||
lw a0, (a0)
|
||||
ret
|
||||
|
||||
# Searches for the occurences of a character in the given memory block.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Memory block.
|
||||
# a1 - Needle.
|
||||
# a2 - Memory size.
|
||||
#
|
||||
# Sets a0 to the pointer to the found character or to null if the character
|
||||
# doesn't occur in the memory block.
|
||||
.type _memchr, @function
|
||||
_memchr:
|
||||
.Lmemchr_loop:
|
||||
beqz a2, .Lmemchr_nil # Exit if the length is 0.
|
||||
|
||||
lbu t0, (a0) # Load the character from the memory block.
|
||||
beq t0, a1, .Lmemchr_end # Exit if the character was found.
|
||||
|
||||
# Otherwise, continue with the next character.
|
||||
addi a0, a0, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemchr_loop
|
||||
|
||||
.Lmemchr_nil:
|
||||
li a0, 0
|
||||
|
||||
.Lmemchr_end:
|
||||
ret
|
||||
|
||||
# Locates a substring.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Haystack.
|
||||
# a1 - Haystack size.
|
||||
# a2 - Needle.
|
||||
# a3 - Needle size.
|
||||
#
|
||||
# Sets a0 to the pointer to the beginning of the substring in memory or to 0
|
||||
# if the substring doesn't occur in the block.
|
||||
.type _memmem, @function
|
||||
_memmem:
|
||||
# Prologue.
|
||||
addi sp, sp, -24
|
||||
sw ra, 20(sp)
|
||||
sw s0, 16(sp)
|
||||
addi s0, sp, 24
|
||||
|
||||
# Save preserved registers. They are used to keep arguments.
|
||||
sw s1, 12(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s3, 4(sp)
|
||||
sw s4, 0(sp)
|
||||
|
||||
mv s1, a0
|
||||
mv s2, a1
|
||||
mv s3, a2
|
||||
mv s4, a3
|
||||
|
||||
.Lmemmem_loop:
|
||||
blt s2, s3, .Lmemmem_nil # Exit if the needle length is greater than memory.
|
||||
|
||||
mv a0, s1
|
||||
mv a1, s3
|
||||
mv a2, s4
|
||||
call _memcmp
|
||||
|
||||
mv t0, a0 # memcmp result.
|
||||
mv a0, s1 # Memory pointer for the case the substring was found.
|
||||
beqz t0, .Lmemmem_end
|
||||
|
||||
addi s1, s1, 1
|
||||
add s2, s2, -1
|
||||
|
||||
j .Lmemmem_loop
|
||||
|
||||
.Lmemmem_nil:
|
||||
li a0, 0
|
||||
|
||||
.Lmemmem_end:
|
||||
|
||||
# Restore the preserved registers.
|
||||
lw s1, 12(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s3, 4(sp)
|
||||
lw s4, 0(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 20(sp)
|
||||
lw s0, 16(sp)
|
||||
add sp, sp, 24
|
||||
ret
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
.type _memcpy, @function
|
||||
_memcpy:
|
||||
mv t0, a0
|
||||
|
||||
.Lmemcpy_loop:
|
||||
beqz a2, .Lmemcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
j .Lmemcpy_loop
|
||||
|
||||
.Lmemcpy_end:
|
||||
mv a0, t0
|
||||
ret
|
||||
|
||||
# Searches for a string in a string array.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Number of elements in the string array.
|
||||
# a1 - String array.
|
||||
# a2 - Needle length.
|
||||
# a3 - Needle.
|
||||
#
|
||||
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
|
||||
# element could not be found.
|
||||
.type _strings_index, @function
|
||||
_strings_index:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp)
|
||||
mv s1, a0
|
||||
sw s2, 16(sp)
|
||||
mv s2, a1
|
||||
sw s3, 12(sp)
|
||||
mv s3, a2
|
||||
sw s4, 8(sp)
|
||||
mv s4, a3
|
||||
sw s5, 4(sp)
|
||||
li s5, 0 # Index counter.
|
||||
|
||||
.Lstrings_index_loop:
|
||||
addi s5, s5, 1
|
||||
beqz s1, .Lstrings_index_missing
|
||||
|
||||
lw a2, (s2) # Read the length of the current element in the haystack.
|
||||
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
|
||||
|
||||
addi a0, s2, 4
|
||||
mv a1, s4
|
||||
call _memcmp
|
||||
|
||||
beqz a0, .Lstrings_index_end
|
||||
|
||||
.Lstrings_index_next:
|
||||
# Advance the pointer, reduce the length.
|
||||
lw a2, (s2)
|
||||
addi s2, s2, 4
|
||||
add s2, s2, a2
|
||||
addi s1, s1, -1
|
||||
j .Lstrings_index_loop
|
||||
|
||||
.Lstrings_index_missing:
|
||||
li s5, 0
|
||||
|
||||
.Lstrings_index_end:
|
||||
mv a0, s5
|
||||
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s4, 8(sp)
|
||||
lw s5, 4(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
add sp, sp, 32
|
||||
ret
|
@@ -1,63 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
#
|
||||
# Tokens.
|
||||
#
|
||||
|
||||
# The constant should match the index in the keywords array in tokenizer.s.
|
||||
|
||||
.equ TOKEN_PROGRAM, 1
|
||||
.equ TOKEN_IMPORT, 2
|
||||
.equ TOKEN_CONST, 3
|
||||
.equ TOKEN_VAR, 4
|
||||
.equ TOKEN_IF, 5
|
||||
.equ TOKEN_THEN, 6
|
||||
.equ TOKEN_ELSIF, 7
|
||||
.equ TOKEN_ELSE, 8
|
||||
.equ TOKEN_WHILE, 9
|
||||
.equ TOKEN_DO, 10
|
||||
.equ TOKEN_PROC, 11
|
||||
.equ TOKEN_BEGIN, 12
|
||||
.equ TOKEN_END, 13
|
||||
.equ TOKEN_TYPE, 14
|
||||
.equ TOKEN_RECORD, 15
|
||||
.equ TOKEN_UNION, 16
|
||||
.equ TOKEN_TRUE, 17
|
||||
.equ TOKEN_FALSE, 18
|
||||
.equ TOKEN_NIL, 19
|
||||
.equ TOKEN_XOR, 20
|
||||
.equ TOKEN_OR, 21
|
||||
.equ TOKEN_RETURN, 22
|
||||
.equ TOKEN_CAST, 23
|
||||
.equ TOKEN_GOTO, 24
|
||||
.equ TOKEN_CASE, 25
|
||||
.equ TOKEN_OF, 26
|
||||
|
||||
.equ TOKEN_IDENTIFIER, 27
|
||||
# The constant should match the character index in the byte_keywords string.
|
||||
|
||||
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
|
||||
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
|
||||
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
|
||||
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
|
||||
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
|
||||
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
|
||||
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
|
||||
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
|
||||
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
|
||||
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
|
||||
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
|
||||
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
|
||||
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
|
||||
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
|
||||
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
|
||||
|
||||
.equ TOKEN_ASSIGN, 43
|
||||
.equ TOKEN_INTEGER, 44
|
||||
|
||||
#
|
||||
# Symbols.
|
||||
#
|
||||
.equ TYPE_PRIMITIVE, 1
|
2195
boot/stage1.s
2195
boot/stage1.s
File diff suppressed because it is too large
Load Diff
2043
boot/stage2.elna
2043
boot/stage2.elna
File diff suppressed because it is too large
Load Diff
971
boot/stage3.elna
Normal file
971
boot/stage3.elna
Normal file
@@ -0,0 +1,971 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Stage 3 compiler.
|
||||
#
|
||||
# - Procedures with multiple arguments.
|
||||
# - Character literals with and without escaping.
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type keyword_section, @object
|
||||
keyword_section: .ascii ".section"
|
||||
|
||||
.type keyword_type, @object
|
||||
keyword_type: .ascii ".type"
|
||||
|
||||
.type keyword_ret, @object
|
||||
keyword_ret: .ascii "ret"
|
||||
|
||||
.type keyword_global, @object
|
||||
keyword_global: .ascii ".globl"
|
||||
|
||||
.type keyword_proc, @object
|
||||
keyword_proc: .ascii "proc "
|
||||
|
||||
.type keyword_end, @object
|
||||
keyword_end: .ascii "end"
|
||||
|
||||
.type keyword_begin, @object
|
||||
keyword_begin: .ascii "begin"
|
||||
|
||||
.type keyword_var, @object
|
||||
keyword_var: .ascii "var"
|
||||
|
||||
.type asm_prologue, @object
|
||||
asm_prologue: .string "\taddi sp, sp, -32\n\tsw ra, 28(sp)\n\tsw s0, 24(sp)\n\taddi s0, sp, 32\n"
|
||||
|
||||
.type asm_epilogue, @object
|
||||
asm_epilogue: .string "\tlw ra, 28(sp)\n\tlw s0, 24(sp)\n\taddi sp, sp, 32\n\tret\n"
|
||||
|
||||
.type asm_type_directive, @object
|
||||
asm_type_directive: .string ".type "
|
||||
|
||||
.type asm_type_function, @object
|
||||
asm_type_function: .string ", @function\n"
|
||||
|
||||
.type asm_colon, @object
|
||||
asm_colon: .string ":\n"
|
||||
|
||||
.type asm_call, @object
|
||||
asm_call: .string "\tcall "
|
||||
|
||||
.type asm_j, @object
|
||||
asm_j: .string "\tj "
|
||||
|
||||
.type asm_li, @object
|
||||
asm_li: .string "\tli "
|
||||
|
||||
.type asm_lw, @object
|
||||
asm_lw: .string "\tlw "
|
||||
|
||||
.type asm_sw, @object
|
||||
asm_sw: .string "\tsw "
|
||||
|
||||
.type asm_mv, @object
|
||||
asm_mv: .string "mv "
|
||||
|
||||
.type asm_t0, @object
|
||||
asm_t0: .string "t0"
|
||||
|
||||
.type asm_a0, @object
|
||||
asm_a0: .string "a0"
|
||||
|
||||
.type asm_comma, @object
|
||||
asm_comma: .string ", "
|
||||
|
||||
.type asm_sp, @object
|
||||
asm_sp: .string "(sp)"
|
||||
|
||||
.section .bss
|
||||
|
||||
# When modifiying also change the read size in the entry point procedure.
|
||||
.type source_code, @object
|
||||
source_code: .zero 81920
|
||||
|
||||
.section .data
|
||||
|
||||
.type source_code_position, @object
|
||||
source_code_position: .word source_code
|
||||
|
||||
.section .text
|
||||
|
||||
# Reads standard input into a buffer.
|
||||
# a0 - Buffer pointer.
|
||||
# a1 - Buffer size.
|
||||
#
|
||||
# Returns the amount of bytes written in a0.
|
||||
proc _read_file();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDIN.
|
||||
li a0, 0
|
||||
li a7, 63 # SYS_READ.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Buffer.
|
||||
# a1 - Buffer length.
|
||||
proc _write_s();
|
||||
begin
|
||||
mv a2, a1
|
||||
mv a1, a0
|
||||
# STDOUT.
|
||||
li a0, 1
|
||||
li a7, 64 # SYS_WRITE.
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Writes a number to a string buffer.
|
||||
#
|
||||
# t0 - Local buffer.
|
||||
# t1 - Constant 10.
|
||||
# t2 - Current character.
|
||||
# t3 - Whether the number is negative.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
# a1 - Buffer pointer.
|
||||
#
|
||||
# Sets a0 to the length of the written number.
|
||||
proc _print_i();
|
||||
begin
|
||||
li t1, 10
|
||||
addi t0, s0, -9
|
||||
|
||||
li t3, 0
|
||||
bgez a0, .print_i_digit10
|
||||
li t3, 1
|
||||
neg a0, a0
|
||||
|
||||
.print_i_digit10:
|
||||
rem t2, a0, t1
|
||||
addi t2, t2, '0'
|
||||
sb t2, 0(t0)
|
||||
div a0, a0, t1
|
||||
addi t0, t0, -1
|
||||
bne zero, a0, .print_i_digit10
|
||||
|
||||
beq zero, t3, .print_i_write_call
|
||||
addi t2, zero, '-'
|
||||
sb t2, 0(t0)
|
||||
addi t0, t0, -1
|
||||
|
||||
.print_i_write_call:
|
||||
mv a0, a1
|
||||
addi a1, t0, 1
|
||||
sub a2, s0, t0
|
||||
addi a2, a2, -9
|
||||
sw a2, 0(sp)
|
||||
|
||||
_memcpy();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
# Writes a number to the standard output.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Whole number.
|
||||
proc _write_i();
|
||||
begin
|
||||
addi a1, sp, 0
|
||||
_print_i();
|
||||
|
||||
mv a1, a0
|
||||
addi a0, sp, 0
|
||||
_write_s();
|
||||
|
||||
end;
|
||||
|
||||
# Writes a character from a0 into the standard output.
|
||||
proc _write_c();
|
||||
begin
|
||||
sb a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
_write_s();
|
||||
end;
|
||||
|
||||
# Write null terminated string.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - String.
|
||||
proc _write_z();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
.write_z_loop:
|
||||
# Check for 0 character.
|
||||
lb a0, (a0)
|
||||
beqz a0, .write_z_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 1
|
||||
sw a0, 0(sp)
|
||||
|
||||
goto .write_z_loop;
|
||||
|
||||
.write_z_end:
|
||||
end;
|
||||
|
||||
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_upper();
|
||||
begin
|
||||
li t0, 'A' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= 'A'
|
||||
|
||||
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
|
||||
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
|
||||
end;
|
||||
|
||||
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
|
||||
proc _is_lower();
|
||||
begin
|
||||
li t0, 'a' - 1
|
||||
sltu t2, t0, a0 # t2 = a0 >= 'a'
|
||||
|
||||
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
|
||||
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
|
||||
end;
|
||||
|
||||
# Detects if the passed character is a 7-bit alpha character or an underscore.
|
||||
#
|
||||
# Paramters:
|
||||
# a0 - Tested character.
|
||||
#
|
||||
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
|
||||
proc _is_alpha();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_upper();
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_lower(v00);
|
||||
|
||||
lw t0, 0(sp)
|
||||
xori t1, t0, '_'
|
||||
seqz t1, t1
|
||||
|
||||
lw t0, 4(sp)
|
||||
or a0, a0, t0
|
||||
or a0, a0, t1
|
||||
end;
|
||||
|
||||
# Detects whether the passed character is a digit
|
||||
# (a value between 0 and 9).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Exemined value.
|
||||
#
|
||||
# Sets a0 to 1 if it is a digit, to 0 otherwise.
|
||||
proc _is_digit();
|
||||
begin
|
||||
li t0, '0' - 1
|
||||
sltu t1, t0, a0 # t1 = a0 >= '0'
|
||||
|
||||
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
|
||||
|
||||
and a0, t1, t2
|
||||
end;
|
||||
|
||||
proc _is_alnum();
|
||||
begin
|
||||
sw a0, 4(sp)
|
||||
|
||||
_is_alpha();
|
||||
sw a0, 0(sp)
|
||||
|
||||
_is_digit(v04);
|
||||
|
||||
lw a1, 0(sp)
|
||||
or a0, a0, a1
|
||||
end;
|
||||
|
||||
# Reads the next token.
|
||||
#
|
||||
# Returns token length in a0.
|
||||
proc _read_token();
|
||||
begin
|
||||
la t0, source_code_position # Token pointer.
|
||||
lw t0, (t0)
|
||||
sw t0, 0(sp) # Current token position.
|
||||
sw zero, 4(sp) # Token length.
|
||||
|
||||
.read_token_loop:
|
||||
lb t0, (t0) # Current character.
|
||||
|
||||
# First we try to read a derictive.
|
||||
# A derictive can contain a dot and characters.
|
||||
li t1, '.'
|
||||
beq t0, t1, .read_token_next
|
||||
|
||||
lw a0, 0(sp)
|
||||
lb a0, (a0)
|
||||
_is_alnum();
|
||||
bnez a0, .read_token_next
|
||||
|
||||
goto .read_token_end;
|
||||
|
||||
.read_token_next:
|
||||
# Advance the source code position and token length.
|
||||
lw t0, 4(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 4(sp)
|
||||
|
||||
lw t0, 0(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 0(sp)
|
||||
|
||||
goto .read_token_loop;
|
||||
|
||||
.read_token_end:
|
||||
lw a0, 4(sp)
|
||||
end;
|
||||
|
||||
# a0 - First pointer.
|
||||
# a1 - Second pointer.
|
||||
# a2 - The length to compare.
|
||||
#
|
||||
# Returns 0 in a0 if memory regions are equal.
|
||||
proc _memcmp();
|
||||
begin
|
||||
mv t0, a0
|
||||
li a0, 0
|
||||
|
||||
.memcmp_loop:
|
||||
beqz a2, .memcmp_end
|
||||
|
||||
lbu t1, (t0)
|
||||
lbu t2, (a1)
|
||||
sub a0, t1, t2
|
||||
|
||||
bnez a0, .memcmp_end
|
||||
|
||||
addi t0, t0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcmp_loop;
|
||||
|
||||
.memcmp_end:
|
||||
end;
|
||||
|
||||
# Copies memory.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Destination.
|
||||
# a1 - Source.
|
||||
# a2 - Size.
|
||||
#
|
||||
# Preserves a0.
|
||||
proc _memcpy();
|
||||
begin
|
||||
mv t0, a0
|
||||
|
||||
.memcpy_loop:
|
||||
beqz a2, .memcpy_end
|
||||
|
||||
lbu t1, (a1)
|
||||
sb t1, (a0)
|
||||
|
||||
addi a0, a0, 1
|
||||
addi a1, a1, 1
|
||||
addi a2, a2, -1
|
||||
|
||||
goto .memcpy_loop
|
||||
|
||||
.memcpy_end:
|
||||
mv a0, t0
|
||||
end;
|
||||
|
||||
# Advances the token stream by a0 bytes.
|
||||
proc _advance_token();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
add t1, t1, a0
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints the current token.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
#
|
||||
# Returns a0 unchanged.
|
||||
proc _write_token();
|
||||
begin
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
lw a0, 0(sp)
|
||||
end;
|
||||
|
||||
proc _compile_section();
|
||||
begin
|
||||
# Print and skip the ".section" (8 characters) directive and a space after it.
|
||||
_write_token(9);
|
||||
_advance_token();
|
||||
|
||||
# Read the section name.
|
||||
_read_token();
|
||||
addi a0, a0, 1
|
||||
|
||||
_write_token();
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _skip_comment();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_comment_loop:
|
||||
# Check for newline character.
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
beq t2, t3, .skip_comment_end
|
||||
|
||||
# Advance the input string by one byte.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_comment_loop;
|
||||
|
||||
.skip_comment_end:
|
||||
# Skip the newline.
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
end;
|
||||
|
||||
# Prints and skips a line.
|
||||
proc _compile_line();
|
||||
begin
|
||||
.compile_line_loop:
|
||||
la a0, source_code_position
|
||||
lw a1, (a0)
|
||||
|
||||
lb t0, (a1)
|
||||
li t1, '\n'
|
||||
beq t0, t1, .compile_line_end
|
||||
|
||||
# Print a character.
|
||||
lw a0, (a1)
|
||||
_write_c();
|
||||
|
||||
# Advance the input string by one byte.
|
||||
_advance_token(1);
|
||||
|
||||
goto .compile_line_loop;
|
||||
|
||||
.compile_line_end:
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_integer_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
_write_c('\n');
|
||||
end;
|
||||
|
||||
proc _compile_character_literal();
|
||||
begin
|
||||
la a0, asm_li
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
li a0, '\''
|
||||
_write_c();
|
||||
_advance_token(1);
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
li t1, '\\'
|
||||
bne a0, t1, .compile_character_literal_end
|
||||
|
||||
li a0, '\\'
|
||||
_write_c();
|
||||
_advance_token(1);
|
||||
|
||||
.compile_character_literal_end:
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
_write_c();
|
||||
|
||||
li a0, '\''
|
||||
_write_c();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
_advance_token(2);
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_variable_expression();
|
||||
begin
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
_advance_token(1);
|
||||
_read_token();
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
end;
|
||||
|
||||
proc _compile_expression();
|
||||
begin
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb a0, (t0)
|
||||
|
||||
li t1, '\''
|
||||
beq a0, t1, .compile_expression_character_literal
|
||||
|
||||
li t1, 'v'
|
||||
beq a0, t1, .compile_expression_variable
|
||||
|
||||
_is_digit();
|
||||
bnez a0, .compile_expression_integer_literal
|
||||
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_character_literal:
|
||||
_compile_character_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_integer_literal:
|
||||
_compile_integer_literal();
|
||||
goto .compile_expression_end;
|
||||
|
||||
.compile_expression_variable:
|
||||
_compile_variable_expression();
|
||||
goto .compile_expression_end;;
|
||||
|
||||
.compile_expression_end:
|
||||
end;
|
||||
|
||||
proc _compile_call();
|
||||
begin
|
||||
# Stack variables:
|
||||
# v0 - Procedure name length.
|
||||
# v4 - Procedure name pointer.
|
||||
# v8 - Argument count.
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 4(sp)
|
||||
|
||||
sw zero, 8(sp)
|
||||
|
||||
# Skip the identifier and left paren.
|
||||
addi a0, a0, 1
|
||||
_advance_token();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ')'
|
||||
beq t0, t1, .compile_call_finalize
|
||||
|
||||
.compile_call_loop:
|
||||
_compile_expression();
|
||||
|
||||
# Save the argument on the stack.
|
||||
la a0, asm_sw
|
||||
_write_z();
|
||||
|
||||
la a0, asm_a0
|
||||
_write_z();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
# Add one to the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, ','
|
||||
bne t0, t1, .compile_call_finalize
|
||||
|
||||
_advance_token(2);
|
||||
goto .compile_call_loop;
|
||||
|
||||
.compile_call_finalize:
|
||||
# Load the argument from the stack.
|
||||
|
||||
lw t0, 8(sp)
|
||||
beqz t0, .compile_call_end
|
||||
|
||||
# Decrement the argument counter.
|
||||
lw t0, 8(sp)
|
||||
addi t0, t0, -1
|
||||
sw t0, 8(sp)
|
||||
|
||||
la a0, asm_lw
|
||||
_write_z();
|
||||
|
||||
_write_c('a');
|
||||
lw a0, 8(sp)
|
||||
_write_i();
|
||||
|
||||
la a0, asm_comma
|
||||
_write_z();
|
||||
|
||||
# Calculate the stack offset: 20 - (4 * argument_counter)
|
||||
lw t0, 8(sp)
|
||||
li t1, 4
|
||||
mul t0, t0, t1
|
||||
li t1, 20
|
||||
sub a0, t1, t0
|
||||
_write_i();
|
||||
|
||||
la a0, asm_sp
|
||||
_write_z();
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
goto .compile_call_finalize;
|
||||
|
||||
.compile_call_end:
|
||||
la a0, asm_call
|
||||
_write_z();
|
||||
|
||||
lw a0, 4(sp)
|
||||
lw a1, 0(sp)
|
||||
_write_s();
|
||||
|
||||
# Skip the right paren.
|
||||
_advance_token(1);
|
||||
end;
|
||||
|
||||
proc _compile_goto();
|
||||
begin
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp)
|
||||
|
||||
la a0, asm_j
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
_advance_token();
|
||||
end;
|
||||
|
||||
proc _compile_statement();
|
||||
begin
|
||||
# This is a call if the statement starts with an underscore.
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
# First character after alignment tab.
|
||||
addi t0, t0, 1
|
||||
lb t0, (t0)
|
||||
|
||||
li t1, '_'
|
||||
beq t0, t1, .compile_statement_call
|
||||
|
||||
li t1, 'g'
|
||||
beq t0, t1, .compile_statement_goto
|
||||
|
||||
_compile_line();
|
||||
goto .compile_statement_end;
|
||||
|
||||
.compile_statement_call:
|
||||
_advance_token(1);
|
||||
_compile_call();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_goto:
|
||||
_advance_token(1);
|
||||
_compile_goto();
|
||||
|
||||
goto .compile_statement_semicolon;
|
||||
|
||||
.compile_statement_semicolon:
|
||||
_advance_token(2);
|
||||
|
||||
_write_c('\n');
|
||||
|
||||
.compile_statement_end:
|
||||
end;
|
||||
|
||||
proc _compile_procedure_body();
|
||||
begin
|
||||
.compile_procedure_body_loop:
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_end
|
||||
li a2, 3 # "end" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure_body_epilogue
|
||||
|
||||
_compile_statement();
|
||||
goto .compile_procedure_body_loop;
|
||||
|
||||
.compile_procedure_body_epilogue:
|
||||
end;
|
||||
|
||||
proc _compile_procedure();
|
||||
begin
|
||||
# Skip "proc ".
|
||||
_advance_token(5);
|
||||
|
||||
_read_token();
|
||||
sw a0, 0(sp) # Save the procedure name length.
|
||||
|
||||
# Write .type _procedure_name, @function.
|
||||
la a0, asm_type_directive
|
||||
_write_z();
|
||||
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_type_function
|
||||
_write_z();
|
||||
|
||||
# Write procedure label, _procedure_name:
|
||||
_write_token(v00);
|
||||
|
||||
la a0, asm_colon
|
||||
_write_z();
|
||||
|
||||
# Skip the function name and trailing parens, semicolon, "begin" and newline.
|
||||
lw a0, 0(sp)
|
||||
addi a0, a0, 10
|
||||
_advance_token();
|
||||
|
||||
la a0, asm_prologue
|
||||
_write_z();
|
||||
|
||||
_compile_procedure_body();
|
||||
|
||||
# Write the epilogue.
|
||||
la a0, asm_epilogue
|
||||
_write_z();
|
||||
|
||||
# Skip the "end" keyword, semicolon and newline.
|
||||
_advance_token(5);
|
||||
end;
|
||||
|
||||
proc _compile_type();
|
||||
begin
|
||||
# Print and skip the ".type" (5 characters) directive and a space after it.
|
||||
_write_token(6);
|
||||
_advance_token();
|
||||
|
||||
# Read and print the symbol name.
|
||||
_read_token();
|
||||
|
||||
# Print and skip the symbol name, comma, space and @.
|
||||
addi a0, a0, 3
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Read the symbol type.
|
||||
_read_token();
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
sw t0, 12(sp)
|
||||
|
||||
# Print the symbol type and newline.
|
||||
addi a0, a0, 1
|
||||
_write_token();
|
||||
_advance_token();
|
||||
|
||||
# Write the object definition itself.
|
||||
_compile_line();
|
||||
|
||||
.compile_type_end:
|
||||
end;
|
||||
|
||||
proc _skip_newlines();
|
||||
begin
|
||||
# Skip newlines.
|
||||
la t0, source_code_position
|
||||
lw t1, (t0)
|
||||
|
||||
.skip_newlines_loop:
|
||||
lb t2, (t1)
|
||||
li t3, '\n'
|
||||
bne t2, t3, .skip_newlines_end
|
||||
beqz t2, .skip_newlines_end
|
||||
|
||||
addi t1, t1, 1
|
||||
sw t1, (t0)
|
||||
|
||||
goto .skip_newlines_loop;
|
||||
|
||||
.skip_newlines_end:
|
||||
end;
|
||||
|
||||
# Process the source code and print the generated code.
|
||||
proc _compile();
|
||||
begin
|
||||
.compile_loop:
|
||||
_skip_newlines();
|
||||
|
||||
la t0, source_code_position
|
||||
lw t0, (t0)
|
||||
lb t0, (t0)
|
||||
beqz t0, .compile_end
|
||||
li t1, '#'
|
||||
beq t0, t1, .compile_comment
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_section
|
||||
li a2, 8 # ".section" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_section
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_type
|
||||
li a2, 5 # ".type" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_type
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_proc
|
||||
li a2, 5 # "proc " length. Space is needed to distinguish from "procedure".
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_procedure
|
||||
|
||||
la a0, source_code_position
|
||||
lw a0, (a0)
|
||||
la a1, keyword_global
|
||||
li a2, 6 # ".globl" length.
|
||||
_memcmp();
|
||||
|
||||
beqz a0, .compile_global
|
||||
# Not a known token, exit.
|
||||
goto .compile_end;
|
||||
|
||||
.compile_section:
|
||||
_compile_section();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_type:
|
||||
_compile_type();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_global:
|
||||
_compile_line();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_comment:
|
||||
_skip_comment();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_procedure:
|
||||
_compile_procedure();
|
||||
|
||||
goto .compile_loop;
|
||||
|
||||
.compile_end:
|
||||
end;
|
||||
|
||||
# Terminates the program. a0 contains the return code.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Status code.
|
||||
proc _exit();
|
||||
begin
|
||||
li a7, 93 # SYS_EXIT
|
||||
ecall
|
||||
end;
|
||||
|
||||
# Entry point.
|
||||
.globl _start
|
||||
proc _start();
|
||||
begin
|
||||
# Read the source from the standard input.
|
||||
la a0, source_code
|
||||
li a1, 81920 # Buffer size.
|
||||
_read_file();
|
||||
_compile();
|
||||
|
||||
_exit(0);
|
||||
|
||||
end;
|
1129
boot/stage4.elna
Normal file
1129
boot/stage4.elna
Normal file
File diff suppressed because it is too large
Load Diff
1487
boot/stage5.elna
Normal file
1487
boot/stage5.elna
Normal file
File diff suppressed because it is too large
Load Diff
1588
boot/stage6.elna
Normal file
1588
boot/stage6.elna
Normal file
File diff suppressed because it is too large
Load Diff
1488
boot/stage7.elna
Normal file
1488
boot/stage7.elna
Normal file
File diff suppressed because it is too large
Load Diff
1979
boot/stage8.elna
Normal file
1979
boot/stage8.elna
Normal file
File diff suppressed because it is too large
Load Diff
1944
boot/stage9.elna
Normal file
1944
boot/stage9.elna
Normal file
File diff suppressed because it is too large
Load Diff
255
boot/symbol.s
255
boot/symbol.s
@@ -2,7 +2,9 @@
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global symbol_table_build, symbol_table_find
|
||||
.global symbol_table
|
||||
.global symbol_table_build, symbol_table_lookup, symbol_table_enter, symbol_table_dump
|
||||
.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local, symbol_table_make_procedure
|
||||
|
||||
.include "boot/definitions.inc"
|
||||
|
||||
@@ -18,6 +20,8 @@ symbol_builtin_name_word: .ascii "Word"
|
||||
symbol_builtin_name_byte: .ascii "Byte"
|
||||
.type symbol_builtin_name_char, @object
|
||||
symbol_builtin_name_char: .ascii "Char"
|
||||
.type symbol_builtin_name_bool, @object
|
||||
symbol_builtin_name_bool: .ascii "Bool"
|
||||
|
||||
# Every type info starts with a word describing what type it is.
|
||||
|
||||
@@ -34,6 +38,9 @@ symbol_builtin_type_byte: .word TYPE_PRIMITIVE
|
||||
.type symbol_builtin_type_char, @object
|
||||
symbol_builtin_type_char: .word TYPE_PRIMITIVE
|
||||
.word 1
|
||||
.type symbol_builtin_type_bool, @object
|
||||
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
|
||||
.word 1
|
||||
|
||||
.section .bss
|
||||
|
||||
@@ -49,16 +56,53 @@ symbol_table: .zero SYMBOL_PRIME
|
||||
|
||||
.section .text
|
||||
|
||||
# Prints the list of symbols in the table.
|
||||
.type symbol_table_dump, @function
|
||||
symbol_table_dump:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp) # Current symbol in the table.
|
||||
sw s2, 16(sp) # Symbol table length.
|
||||
|
||||
la s1, symbol_table
|
||||
lw s2, 0(s1)
|
||||
addi s1, s1, 4 # Advance to the first symbol in the table.
|
||||
|
||||
.Lsymbol_table_dump_loop:
|
||||
beqz s2, .Lsymbol_table_dump_end
|
||||
|
||||
# Compare string lengths.
|
||||
lw a0, 4(s1)
|
||||
lw a1, 0(s1)
|
||||
call _write_error
|
||||
|
||||
addi s1, s1, 12
|
||||
addi s2, s2, -1
|
||||
j .Lsymbol_table_dump_loop
|
||||
|
||||
.Lsymbol_table_dump_end:
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
||||
|
||||
# Searches for a symbol by name.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Local symbol table or 0.
|
||||
# a1 - Length of the symbol to search.
|
||||
# a2 - Pointer to the symbol name.
|
||||
# a0 - Length of the symbol to search.
|
||||
# a1 - Pointer to the symbol name.
|
||||
#
|
||||
# Sets a0 to the symbol info.
|
||||
.type symbol_table_find, @function
|
||||
symbol_table_find:
|
||||
# Sets a0 to the symbol info pointer or 0 if the symbol has not been found.
|
||||
.type symbol_table_lookup, @function
|
||||
symbol_table_lookup:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
@@ -70,38 +114,37 @@ symbol_table_find:
|
||||
sw s3, 12(sp) # Length of the symbol to search.
|
||||
sw s4, 8(sp) # Pointer to the symbol to search.
|
||||
|
||||
mv s3, a1
|
||||
mv s4, a2
|
||||
mv s3, a0
|
||||
mv s4, a1
|
||||
|
||||
la s1, symbol_table
|
||||
lw s2, 0(s1)
|
||||
addi s1, s1, 4 # Advance to the first symbol in the table.
|
||||
|
||||
.Lsymbol_table_find_loop:
|
||||
beqz s2, .Lsymbol_table_find_not_found
|
||||
.Lsymbol_table_lookup_loop:
|
||||
beqz s2, .Lsymbol_table_lookup_not_found
|
||||
|
||||
# Compare string lengths.
|
||||
mv a0, s3
|
||||
mv a1, s4
|
||||
lw a2, 0(s1)
|
||||
bne s3, a2, .Lsymbol_table_find_continue
|
||||
lw a3, 4(s1)
|
||||
call _string_equal
|
||||
|
||||
# If lengths match, compare the content.
|
||||
mv a0, s4
|
||||
lw a1, 4(s1)
|
||||
call _memcmp
|
||||
|
||||
bnez a0, .Lsymbol_table_find_continue
|
||||
beqz a0, .Lsymbol_table_lookup_continue
|
||||
|
||||
lw a0, 8(s1) # Pointer to the symbol.
|
||||
j .Lsymbol_table_end
|
||||
j .Lsymbol_table_lookup_end
|
||||
|
||||
.Lsymbol_table_find_continue:
|
||||
.Lsymbol_table_lookup_continue:
|
||||
addi s1, s1, 12
|
||||
addi s2, s2, -1
|
||||
j .Lsymbol_table_find_loop
|
||||
j .Lsymbol_table_lookup_loop
|
||||
|
||||
.Lsymbol_table_find_not_found:
|
||||
.Lsymbol_table_lookup_not_found:
|
||||
li a0, 0
|
||||
|
||||
.Lsymbol_table_end:
|
||||
.Lsymbol_table_lookup_end:
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
@@ -113,56 +156,142 @@ symbol_table_find:
|
||||
addi sp, sp, 32
|
||||
ret
|
||||
|
||||
# Creates a pointer type.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Pointer to the base type.
|
||||
# a1 - Output memory.
|
||||
#
|
||||
# Sets a0 to the size of newly created type in bytes.
|
||||
.type symbol_table_make_pointer, @function
|
||||
symbol_table_make_pointer:
|
||||
li t0, TYPE_POINTER
|
||||
sw t0, 0(a1)
|
||||
sw a0, 4(a1)
|
||||
|
||||
li a0, 8
|
||||
ret
|
||||
|
||||
# Creates a parameter info.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Pointer to the parameter type.
|
||||
# a1 - Parameter offset.
|
||||
# a2 - Output memory.
|
||||
#
|
||||
# Sets a0 to the size of newly created info object in bytes.
|
||||
.type symbol_table_make_parameter, @function
|
||||
symbol_table_make_parameter:
|
||||
li t0, INFO_PARAMETER
|
||||
sw t0, 0(a2)
|
||||
sw a0, 4(a2)
|
||||
sw a1, 8(a2)
|
||||
|
||||
li a0, 12
|
||||
ret
|
||||
|
||||
# Creates a local variable info.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Pointer to the variable type.
|
||||
# a1 - Variable stack offset.
|
||||
# a2 - Output memory.
|
||||
#
|
||||
# Sets a0 to the size of newly created info object in bytes.
|
||||
.type symbol_table_make_local, @function
|
||||
symbol_table_make_local:
|
||||
li t0, INFO_LOCAL
|
||||
sw t0, 0(a2)
|
||||
sw a0, 4(a2)
|
||||
sw a1, 8(a2)
|
||||
|
||||
li a0, 12
|
||||
ret
|
||||
|
||||
# Creates a procedure type and procedure info objects refering the type.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Output memory.
|
||||
#
|
||||
# Sets a0 to the size of newly created info object in bytes.
|
||||
.type symbol_table_make_procedure, @function
|
||||
symbol_table_make_procedure:
|
||||
li t0, TYPE_PROCEDURE
|
||||
sw t0, 8(a0)
|
||||
|
||||
li t0, INFO_PROCEDURE
|
||||
sw t0, 0(a0)
|
||||
sw a0, 4(a0) # Procedure type stored in the same memory segment.
|
||||
|
||||
li a0, 12
|
||||
ret
|
||||
|
||||
# Inserts a symbol into the table.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Symbol name length.
|
||||
# a1 - Symbol name pointer.
|
||||
# a2 - Symbol pointer.
|
||||
.type symbol_table_enter, @function
|
||||
symbol_table_enter:
|
||||
la t0, symbol_table
|
||||
|
||||
lw t1, 0(t0) # Current table length.
|
||||
li t2, 12 # Calculate the offset to the next entry.
|
||||
mul t2, t1, t2
|
||||
addi t2, t2, 4
|
||||
add t2, t0, t2
|
||||
|
||||
sw a0, 0(t2)
|
||||
sw a1, 4(t2)
|
||||
sw a2, 8(t2)
|
||||
|
||||
addi t1, t1, 1 # Save the new length.
|
||||
sw t1, 0(t0)
|
||||
|
||||
ret
|
||||
|
||||
# Build the initial symbols.
|
||||
#
|
||||
# Sets a0 to the pointer to the global symbol table.
|
||||
.type symbol_build, @function
|
||||
symbol_table_build:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
la a0, symbol_table
|
||||
addi t0, a0, 4
|
||||
|
||||
li t1, 3 # Length of the word "Int".
|
||||
sw t1, 0(t0)
|
||||
la t1, symbol_builtin_name_int
|
||||
sw t1, 4(t0)
|
||||
la t1, symbol_builtin_type_int
|
||||
sw t1, 8(t0)
|
||||
lw t1, 0(a0)
|
||||
addi t1, t1, 1
|
||||
sw t1, 0(a0)
|
||||
addi t0, t0, 12
|
||||
li a0, 3 # Length of the word "Int".
|
||||
la a1, symbol_builtin_name_int
|
||||
la a2, symbol_builtin_type_int
|
||||
call symbol_table_enter
|
||||
|
||||
li t1, 4 # Length of the word "Word".
|
||||
sw t1, 0(t0)
|
||||
la t1, symbol_builtin_name_word
|
||||
sw t1, 4(t0)
|
||||
la t1, symbol_builtin_type_word
|
||||
sw t1, 8(t0)
|
||||
lw t1, 0(a0)
|
||||
addi t1, t1, 1
|
||||
sw t1, 0(a0)
|
||||
addi t0, t0, 12
|
||||
li a0, 4 # Length of the word "Word".
|
||||
la a1, symbol_builtin_name_word
|
||||
la a2, symbol_builtin_type_word
|
||||
call symbol_table_enter
|
||||
|
||||
li t1, 4 # Length of the word "Byte".
|
||||
sw t1, 0(t0)
|
||||
la t1, symbol_builtin_name_byte
|
||||
sw t1, 4(t0)
|
||||
la t1, symbol_builtin_type_byte
|
||||
sw t1, 8(t0)
|
||||
lw t1, 0(a0)
|
||||
addi t1, t1, 1
|
||||
sw t1, 0(a0)
|
||||
addi t0, t0, 12
|
||||
li a0, 4 # Length of the word "Byte".
|
||||
la a1, symbol_builtin_name_byte
|
||||
la a2, symbol_builtin_type_byte
|
||||
call symbol_table_enter
|
||||
|
||||
li t1, 4 # Length of the word "Char".
|
||||
sw t1, 0(t0)
|
||||
la t1, symbol_builtin_name_char
|
||||
sw t1, 4(t0)
|
||||
la t1, symbol_builtin_type_char
|
||||
sw t1, 8(t0)
|
||||
lw t1, 0(a0)
|
||||
addi t1, t1, 1
|
||||
sw t1, 0(a0)
|
||||
addi t0, t0, 12
|
||||
li a0, 4 # Length of the word "Char".
|
||||
la a1, symbol_builtin_name_char
|
||||
la a2, symbol_builtin_type_char
|
||||
call symbol_table_enter
|
||||
|
||||
li a0, 4 # Length of the word "Bool".
|
||||
la a1, symbol_builtin_name_bool
|
||||
la a2, symbol_builtin_type_bool
|
||||
call symbol_table_enter
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
616
boot/tokenizer.s
616
boot/tokenizer.s
@@ -1,616 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/.
|
||||
|
||||
.global _tokenize_next, classification, transitions, keywords, byte_keywords
|
||||
|
||||
.include "boot/definitions.inc"
|
||||
|
||||
.section .rodata
|
||||
|
||||
#
|
||||
# Classification table assigns each possible character to a group (class). All
|
||||
# characters of the same group a handled equivalently.
|
||||
#
|
||||
# Classification:
|
||||
#
|
||||
.equ CLASS_INVALID, 0x00
|
||||
.equ CLASS_DIGIT, 0x01
|
||||
.equ CLASS_CHARACTER, 0x02
|
||||
.equ CLASS_SPACE, 0x03
|
||||
.equ CLASS_COLON, 0x04
|
||||
.equ CLASS_EQUALS, 0x05
|
||||
.equ CLASS_LEFT_PAREN, 0x06
|
||||
.equ CLASS_RIGHT_PAREN, 0x07
|
||||
.equ CLASS_ASTERISK, 0x08
|
||||
.equ CLASS_UNDERSCORE, 0x09
|
||||
.equ CLASS_SINGLE, 0x0a
|
||||
.equ CLASS_HEX, 0x0b
|
||||
.equ CLASS_ZERO, 0x0c
|
||||
.equ CLASS_X, 0x0d
|
||||
.equ CLASS_EOF, 0x0e
|
||||
.equ CLASS_DOT, 0x0f
|
||||
.equ CLASS_MINUS, 0x10
|
||||
.equ CLASS_QUOTE, 0x11
|
||||
.equ CLASS_GREATER, 0x12
|
||||
.equ CLASS_LESS, 0x13
|
||||
|
||||
.equ CLASS_COUNT, 20
|
||||
|
||||
.type classification, @object
|
||||
classification:
|
||||
.byte CLASS_EOF # 00 NUL
|
||||
.byte CLASS_INVALID # 01 SOH
|
||||
.byte CLASS_INVALID # 02 STX
|
||||
.byte CLASS_INVALID # 03 ETX
|
||||
.byte CLASS_INVALID # 04 EOT
|
||||
.byte CLASS_INVALID # 05 ENQ
|
||||
.byte CLASS_INVALID # 06 ACK
|
||||
.byte CLASS_INVALID # 07 BEL
|
||||
.byte CLASS_INVALID # 08 BS
|
||||
.byte CLASS_SPACE # 09 HT
|
||||
.byte CLASS_SPACE # 0A LF
|
||||
.byte CLASS_INVALID # 0B VT
|
||||
.byte CLASS_INVALID # 0C FF
|
||||
.byte CLASS_SPACE # 0D CR
|
||||
.byte CLASS_INVALID # 0E SO
|
||||
.byte CLASS_INVALID # 0F SI
|
||||
.byte CLASS_INVALID # 10 DLE
|
||||
.byte CLASS_INVALID # 11 DC1
|
||||
.byte CLASS_INVALID # 12 DC2
|
||||
.byte CLASS_INVALID # 13 DC3
|
||||
.byte CLASS_INVALID # 14 DC4
|
||||
.byte CLASS_INVALID # 15 NAK
|
||||
.byte CLASS_INVALID # 16 SYN
|
||||
.byte CLASS_INVALID # 17 ETB
|
||||
.byte CLASS_INVALID # 18 CAN
|
||||
.byte CLASS_INVALID # 19 EM
|
||||
.byte CLASS_INVALID # 1A SUB
|
||||
.byte CLASS_INVALID # 1B ESC
|
||||
.byte CLASS_INVALID # 1C FS
|
||||
.byte CLASS_INVALID # 1D GS
|
||||
.byte CLASS_INVALID # 1E RS
|
||||
.byte CLASS_INVALID # 1F US
|
||||
.byte CLASS_SPACE # 20 Space
|
||||
.byte CLASS_SINGLE # 21 !
|
||||
.byte CLASS_QUOTE # 22 "
|
||||
.byte 0x00 # 23 #
|
||||
.byte 0x00 # 24 $
|
||||
.byte CLASS_SINGLE # 25 %
|
||||
.byte CLASS_SINGLE # 26 &
|
||||
.byte CLASS_QUOTE # 27 '
|
||||
.byte CLASS_LEFT_PAREN # 28 (
|
||||
.byte CLASS_RIGHT_PAREN # 29 )
|
||||
.byte CLASS_ASTERISK # 2A *
|
||||
.byte CLASS_SINGLE # 2B +
|
||||
.byte CLASS_SINGLE # 2C ,
|
||||
.byte CLASS_MINUS # 2D -
|
||||
.byte CLASS_DOT # 2E .
|
||||
.byte CLASS_SINGLE # 2F /
|
||||
.byte CLASS_ZERO # 30 0
|
||||
.byte CLASS_DIGIT # 31 1
|
||||
.byte CLASS_DIGIT # 32 2
|
||||
.byte CLASS_DIGIT # 33 3
|
||||
.byte CLASS_DIGIT # 34 4
|
||||
.byte CLASS_DIGIT # 35 5
|
||||
.byte CLASS_DIGIT # 36 6
|
||||
.byte CLASS_DIGIT # 37 7
|
||||
.byte CLASS_DIGIT # 38 8
|
||||
.byte CLASS_DIGIT # 39 9
|
||||
.byte CLASS_COLON # 3A :
|
||||
.byte CLASS_SINGLE # 3B ;
|
||||
.byte CLASS_LESS # 3C <
|
||||
.byte CLASS_EQUALS # 3D =
|
||||
.byte CLASS_GREATER # 3E >
|
||||
.byte 0x00 # 3F ?
|
||||
.byte CLASS_SINGLE # 40 @
|
||||
.byte CLASS_CHARACTER # 41 A
|
||||
.byte CLASS_CHARACTER # 42 B
|
||||
.byte CLASS_CHARACTER # 43 C
|
||||
.byte CLASS_CHARACTER # 44 D
|
||||
.byte CLASS_CHARACTER # 45 E
|
||||
.byte CLASS_CHARACTER # 46 F
|
||||
.byte CLASS_CHARACTER # 47 G
|
||||
.byte CLASS_CHARACTER # 48 H
|
||||
.byte CLASS_CHARACTER # 49 I
|
||||
.byte CLASS_CHARACTER # 4A J
|
||||
.byte CLASS_CHARACTER # 4B K
|
||||
.byte CLASS_CHARACTER # 4C L
|
||||
.byte CLASS_CHARACTER # 4D M
|
||||
.byte CLASS_CHARACTER # 4E N
|
||||
.byte CLASS_CHARACTER # 4F O
|
||||
.byte CLASS_CHARACTER # 50 P
|
||||
.byte CLASS_CHARACTER # 51 Q
|
||||
.byte CLASS_CHARACTER # 52 R
|
||||
.byte CLASS_CHARACTER # 53 S
|
||||
.byte CLASS_CHARACTER # 54 T
|
||||
.byte CLASS_CHARACTER # 55 U
|
||||
.byte CLASS_CHARACTER # 56 V
|
||||
.byte CLASS_CHARACTER # 57 W
|
||||
.byte CLASS_CHARACTER # 58 X
|
||||
.byte CLASS_CHARACTER # 59 Y
|
||||
.byte CLASS_CHARACTER # 5A Z
|
||||
.byte CLASS_SINGLE # 5B [
|
||||
.byte 0x00 # 5C \
|
||||
.byte CLASS_SINGLE # 5D ]
|
||||
.byte CLASS_SINGLE # 5E ^
|
||||
.byte CLASS_UNDERSCORE # 5F _
|
||||
.byte 0x00 # 60 `
|
||||
.byte CLASS_HEX # 61 a
|
||||
.byte CLASS_HEX # 62 b
|
||||
.byte CLASS_HEX # 63 c
|
||||
.byte CLASS_HEX # 64 d
|
||||
.byte CLASS_HEX # 65 e
|
||||
.byte CLASS_HEX # 66 f
|
||||
.byte CLASS_CHARACTER # 67 g
|
||||
.byte CLASS_CHARACTER # 68 h
|
||||
.byte CLASS_CHARACTER # 69 i
|
||||
.byte CLASS_CHARACTER # 6A j
|
||||
.byte CLASS_CHARACTER # 6B k
|
||||
.byte CLASS_CHARACTER # 6C l
|
||||
.byte CLASS_CHARACTER # 6D m
|
||||
.byte CLASS_CHARACTER # 6E n
|
||||
.byte CLASS_CHARACTER # 6F o
|
||||
.byte CLASS_CHARACTER # 70 p
|
||||
.byte CLASS_CHARACTER # 71 q
|
||||
.byte CLASS_CHARACTER # 72 r
|
||||
.byte CLASS_CHARACTER # 73 s
|
||||
.byte CLASS_CHARACTER # 74 t
|
||||
.byte CLASS_CHARACTER # 75 u
|
||||
.byte CLASS_CHARACTER # 76 v
|
||||
.byte CLASS_CHARACTER # 77 w
|
||||
.byte CLASS_X # 78 x
|
||||
.byte CLASS_CHARACTER # 79 y
|
||||
.byte CLASS_CHARACTER # 7A z
|
||||
.byte 0x00 # 7B {
|
||||
.byte CLASS_SINGLE # 7C |
|
||||
.byte 0x00 # 7D }
|
||||
.byte CLASS_SINGLE # 7E ~
|
||||
.byte CLASS_INVALID # 7F DEL
|
||||
|
||||
#
|
||||
# Textual keywords in the language.
|
||||
#
|
||||
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
|
||||
|
||||
.type keywords, @object
|
||||
keywords:
|
||||
.word 7
|
||||
.ascii "program"
|
||||
.word 6
|
||||
.ascii "import"
|
||||
.word 5
|
||||
.ascii "const"
|
||||
.word 3
|
||||
.ascii "var"
|
||||
.word 2
|
||||
.ascii "if"
|
||||
.word 4
|
||||
.ascii "then"
|
||||
.word 5
|
||||
.ascii "elsif"
|
||||
.word 4
|
||||
.ascii "else"
|
||||
.word 5
|
||||
.ascii "while"
|
||||
.word 2
|
||||
.ascii "do"
|
||||
.word 4
|
||||
.ascii "proc"
|
||||
.word 5
|
||||
.ascii "begin"
|
||||
.word 3
|
||||
.ascii "end"
|
||||
.word 4
|
||||
.ascii "type"
|
||||
.word 6
|
||||
.ascii "record"
|
||||
.word 5
|
||||
.ascii "union"
|
||||
.word 4
|
||||
.ascii "true"
|
||||
.word 5
|
||||
.ascii "false"
|
||||
.word 3
|
||||
.ascii "nil"
|
||||
.word 3
|
||||
.ascii "xor"
|
||||
.word 2
|
||||
.ascii "or"
|
||||
.word 6
|
||||
.ascii "return"
|
||||
.word 4
|
||||
.ascii "cast"
|
||||
.word 4
|
||||
.ascii "goto"
|
||||
.word 4
|
||||
.ascii "case"
|
||||
.word 2
|
||||
.ascii "of"
|
||||
|
||||
.type byte_keywords, @object
|
||||
byte_keywords: .ascii "&.,:;()[]^=+-*@"
|
||||
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
|
||||
|
||||
.section .data
|
||||
|
||||
# The transition table describes transitions from one state to another, given
|
||||
# a symbol (character class).
|
||||
#
|
||||
# The table has m rows and n columns, where m is the amount of states and n is
|
||||
# the amount of classes. So given the current state and a classified character
|
||||
# the table can be used to look up the next state.
|
||||
#
|
||||
# Each cell is a word long.
|
||||
# - The least significant byte of the word is a row number (beginning with 0).
|
||||
# It specifies the target state. "ff" means that this is an end state and no
|
||||
# transition is possible.
|
||||
# - The next byte is the action that should be performed when transitioning.
|
||||
# For the meaning of actions see labels in the _tokenize_next function, which
|
||||
# handles each action.
|
||||
#
|
||||
.type transitions, @object
|
||||
transitions:
|
||||
# Invalid Digit Alpha Space : = ( )
|
||||
# * _ Single Hex 0 x NUL .
|
||||
# - " or ' > <
|
||||
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
|
||||
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
|
||||
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
|
||||
|
||||
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
|
||||
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
|
||||
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
|
||||
|
||||
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
|
||||
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
|
||||
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
|
||||
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
|
||||
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
|
||||
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
|
||||
|
||||
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
|
||||
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
|
||||
|
||||
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
|
||||
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
|
||||
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
|
||||
|
||||
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
||||
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
|
||||
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
|
||||
|
||||
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
|
||||
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
|
||||
|
||||
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
|
||||
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
|
||||
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
|
||||
|
||||
.section .text
|
||||
|
||||
# Returns the class from the classification table for the given character.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Character.
|
||||
#
|
||||
# Sets a0 to the class number.
|
||||
.type _classify, @function
|
||||
_classify:
|
||||
la t0, classification
|
||||
add t0, t0, a0 # Character class pointer.
|
||||
lbu a0, (t0) # Character class.
|
||||
ret
|
||||
|
||||
# Given the current state and a character class, calculates the next state.
|
||||
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character class.
|
||||
#
|
||||
# Sets a0 to the next state.
|
||||
.type _lookup_state, @function
|
||||
_lookup_state:
|
||||
li t0, CLASS_COUNT
|
||||
mul a0, a0, t0 # Transition row.
|
||||
add a0, a0, a1 # Transition column.
|
||||
|
||||
li t0, 4
|
||||
mul a0, a0, t0 # Multiply by the word size.
|
||||
|
||||
la t0, transitions
|
||||
add t0, t0, a0
|
||||
lw a0, (t0) # Next state.
|
||||
|
||||
ret
|
||||
|
||||
# Chains _classify and _lookup_state.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Current state.
|
||||
# a1 - Character.
|
||||
#
|
||||
# Sets a0 to the next state based on the given character.
|
||||
.type _next_state, @function
|
||||
_next_state:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
sw a0, 4(sp)
|
||||
mv a0, a1
|
||||
call _classify
|
||||
|
||||
mv a1, a0
|
||||
lw a0, 4(sp)
|
||||
call _lookup_state
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Takes an identifier and checks whether it's a keyword.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
# a1 - Token pointer.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type _classify_identifier, @function
|
||||
_classify_identifier:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a2, a0
|
||||
mv a3, a1
|
||||
li a0, KEYWORDS_COUNT
|
||||
la a1, keywords
|
||||
call _strings_index
|
||||
|
||||
bnez a0, .Lclassify_identifier_end
|
||||
li a0, TOKEN_IDENTIFIER
|
||||
|
||||
.Lclassify_identifier_end:
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Takes a symbol and determines its type.
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token character.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type _classify_single, @function
|
||||
_classify_single:
|
||||
# Prologue.
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
addi s0, sp, 16
|
||||
|
||||
mv a1, a0
|
||||
li a2, BYTE_KEYWORDS_SIZE
|
||||
la a0, byte_keywords
|
||||
call _memchr
|
||||
|
||||
la a1, byte_keywords
|
||||
sub a0, a0, a1
|
||||
addi a0, a0, TOKEN_IDENTIFIER + 1
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# Classified a symbol containing multiple characters (probably 2).
|
||||
#
|
||||
# Parameters:
|
||||
# a0 - Token length.
|
||||
# a1 - Token pointer.
|
||||
#
|
||||
# Sets a0 to the appropriate token type.
|
||||
.type _classify_composite, @function
|
||||
_classify_composite:
|
||||
lbu t0, 0(a1)
|
||||
li t1, ':'
|
||||
beq t0, t1, .Lclassify_composite_assign
|
||||
|
||||
j .Lclassify_composite_end
|
||||
|
||||
.Lclassify_composite_assign:
|
||||
li a0, TOKEN_ASSIGN
|
||||
j .Lclassify_composite_end
|
||||
|
||||
.Lclassify_composite_end:
|
||||
ret
|
||||
|
||||
# Initializes the classification table.
|
||||
#
|
||||
# Paramaters:
|
||||
# a0 - Source text pointer.
|
||||
# a1 - A pointer for output value, the token kind. 4 Bytes.
|
||||
#
|
||||
# Sets a0 to the position of the next token.
|
||||
.type _tokenize_next, @function
|
||||
_tokenize_next:
|
||||
# Prologue.
|
||||
addi sp, sp, -32
|
||||
sw ra, 28(sp)
|
||||
sw s0, 24(sp)
|
||||
addi s0, sp, 32
|
||||
|
||||
sw s1, 20(sp) # Preserve s1 used for current source text position.
|
||||
mv s1, a0
|
||||
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
|
||||
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
|
||||
|
||||
sw s2, 16(sp) # Preserve s2 containing the current state.
|
||||
li s2, 0x00 # Initial, start state.
|
||||
|
||||
sw a1, 0(sp)
|
||||
sw zero, (a1) # Initialize.
|
||||
|
||||
.Ltokenize_next_loop:
|
||||
mv a0, s2
|
||||
lbu a1, (s1)
|
||||
call _next_state
|
||||
|
||||
li t0, 0xff
|
||||
and s2, a0, t0 # Next state.
|
||||
|
||||
li t0, 0xff00
|
||||
and t1, a0, t0 # Transition action.
|
||||
srli t1, t1, 8
|
||||
|
||||
# Perform the provided action.
|
||||
li t0, 0x01 # Accumulate action.
|
||||
beq t1, t0, .Ltokenize_next_accumulate
|
||||
|
||||
li t0, 0x02 # Print action.
|
||||
beq t1, t0, .Ltokenize_next_print
|
||||
|
||||
li t0, 0x03 # Skip action.
|
||||
beq t1, t0, .Ltokenize_next_skip
|
||||
|
||||
li t0, 0x04 # Delimited string action.
|
||||
beq t1, t0, .Ltokenize_next_comment
|
||||
|
||||
li t0, 0x05 # Finalize identifier.
|
||||
beq t1, t0, .Ltokenize_next_identifier
|
||||
|
||||
li t0, 0x06 # Single character symbol action.
|
||||
beq t1, t0, .Ltokenize_next_single
|
||||
|
||||
li t0, 0x07 # An action for symbols containing multiple characters.
|
||||
beq t1, t0, .Ltokenize_next_composite
|
||||
|
||||
li t0, 0x08 # Integer action.
|
||||
beq t1, t0, .Ltokenize_next_integer
|
||||
|
||||
j .Ltokenize_next_reject
|
||||
|
||||
.Ltokenize_next_reject:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_accumulate:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Ltokenize_next_loop
|
||||
|
||||
.Ltokenize_next_skip:
|
||||
addi s1, s1, 1
|
||||
lw t0, 12(sp)
|
||||
addi t0, t0, 1
|
||||
sw t0, 12(sp)
|
||||
|
||||
j .Ltokenize_next_loop
|
||||
|
||||
.Ltokenize_next_print:
|
||||
/* DEBUG
|
||||
addi a0, a0, 21
|
||||
sw a0, 0(sp)
|
||||
addi a0, sp, 0
|
||||
li a1, 1
|
||||
call _write_error */
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_comment:
|
||||
addi s1, s1, 1
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_identifier:
|
||||
# An identifier can be a textual keyword.
|
||||
# Check the kind of the token and write it into the output parameter.
|
||||
lw a1, 12(sp)
|
||||
sub a0, s1, a1
|
||||
sw a0, 8(sp)
|
||||
call _classify_identifier
|
||||
sw a0, 4(sp)
|
||||
lw a0, 0(sp)
|
||||
addi a1, sp, 4
|
||||
li a2, 12
|
||||
call _memcpy
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_single:
|
||||
lw a0, 12(sp)
|
||||
addi s1, a0, 1
|
||||
lbu a0, (a0)
|
||||
call _classify_single
|
||||
lw a1, 0(sp)
|
||||
sw a0, (a1)
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_composite:
|
||||
addi s1, s1, 1
|
||||
lw a1, 12(sp)
|
||||
sub a0, s1, a1
|
||||
call _classify_composite
|
||||
lw a1, 0(sp)
|
||||
sw a0, (a1)
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_integer:
|
||||
lw t0, 0(sp)
|
||||
li t1, TOKEN_INTEGER
|
||||
sw t1, 0(t0)
|
||||
lw t1, 12(sp)
|
||||
sw t1, 8(t0)
|
||||
sub t1, s1, t1
|
||||
sw t1, 4(t0)
|
||||
|
||||
j .Ltokenize_next_end
|
||||
|
||||
.Ltokenize_next_end:
|
||||
mv a0, s1 # Return the advanced text pointer.
|
||||
|
||||
# Restore saved registers.
|
||||
lw s1, 20(sp)
|
||||
lw s2, 16(sp)
|
||||
|
||||
# Epilogue.
|
||||
lw ra, 28(sp)
|
||||
lw s0, 24(sp)
|
||||
addi sp, sp, 32
|
||||
ret
|
@@ -1,323 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/. -}
|
||||
# frozen_string_literal: true
|
||||
|
||||
require 'pathname'
|
||||
require 'uri'
|
||||
require 'net/http'
|
||||
require 'rake/clean'
|
||||
require 'open3'
|
||||
require 'etc'
|
||||
|
||||
GCC_VERSION = "15.1.0"
|
||||
BINUTILS_VERSION = '2.44'
|
||||
GLIBC_VERSION = '2.41'
|
||||
KERNEL_VERSION = '5.15.181'
|
||||
|
||||
CLOBBER.include 'build'
|
||||
|
||||
class BuildTarget
|
||||
attr_accessor(:build, :gcc, :target, :tmp)
|
||||
|
||||
def gxx
|
||||
@gcc.gsub 'c', '+'
|
||||
end
|
||||
|
||||
def sysroot
|
||||
tmp + 'sysroot'
|
||||
end
|
||||
|
||||
def rootfs
|
||||
tmp + 'rootfs'
|
||||
end
|
||||
|
||||
def tools
|
||||
tmp + 'tools'
|
||||
end
|
||||
end
|
||||
|
||||
def gcc_verbose(gcc_binary)
|
||||
read, write = IO.pipe
|
||||
sh({'LANG' => 'C'}, gcc_binary, '--verbose', err: write)
|
||||
write.close
|
||||
output = read.read
|
||||
read.close
|
||||
output
|
||||
end
|
||||
|
||||
def find_build_target(gcc_version, task)
|
||||
gcc_binary = 'gcc'
|
||||
output = gcc_verbose gcc_binary
|
||||
|
||||
if output.start_with? 'Apple clang'
|
||||
gcc_binary = "gcc-#{gcc_version.split('.').first}"
|
||||
output = gcc_verbose gcc_binary
|
||||
end
|
||||
result = output
|
||||
.lines
|
||||
.each_with_object(BuildTarget.new) do |line, accumulator|
|
||||
if line.start_with? 'Target: '
|
||||
accumulator.build = line.split(' ').last.strip
|
||||
elsif line.start_with? 'COLLECT_GCC'
|
||||
accumulator.gcc = line.split('=').last.strip
|
||||
end
|
||||
end
|
||||
result.tmp = Pathname.new('./build')
|
||||
task.with_defaults target: 'riscv32-unknown-linux-gnu'
|
||||
result.target = task[:target]
|
||||
result
|
||||
end
|
||||
|
||||
def download_and_unarchive(url, target)
|
||||
case File.extname url.path
|
||||
when '.bz2'
|
||||
archive_type = '-j'
|
||||
root_directory = File.basename url.path, '.tar.bz2'
|
||||
when '.xz'
|
||||
archive_type = '-J'
|
||||
root_directory = File.basename url.path, '.tar.xz'
|
||||
else
|
||||
raise "Unsupported archive type #{url.path}."
|
||||
end
|
||||
|
||||
Net::HTTP.start(url.host, url.port, use_ssl: url.scheme == 'https') do |http|
|
||||
request = Net::HTTP::Get.new url.request_uri
|
||||
|
||||
http.request request do |response|
|
||||
case response
|
||||
when Net::HTTPRedirection
|
||||
download_and_unarchive URI.parse(response['location'])
|
||||
when Net::HTTPSuccess
|
||||
Open3.popen2 'tar', '-C', target.to_path, archive_type, '-xv' do |stdin, stdout, wait_thread|
|
||||
Thread.new do
|
||||
stdout.each { |line| puts line }
|
||||
end
|
||||
|
||||
response.read_body do |chunk|
|
||||
stdin.write chunk
|
||||
end
|
||||
stdin.close
|
||||
|
||||
wait_thread.value
|
||||
end
|
||||
else
|
||||
response.error!
|
||||
end
|
||||
end
|
||||
end
|
||||
target + root_directory
|
||||
end
|
||||
|
||||
namespace :cross do
|
||||
desc 'Build cross binutils'
|
||||
task :binutils, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
options.tools.mkpath
|
||||
source_directory = download_and_unarchive(
|
||||
URI.parse("https://ftp.gnu.org/gnu/binutils/binutils-#{BINUTILS_VERSION}.tar.xz"),
|
||||
options.tools)
|
||||
|
||||
cwd = source_directory.dirname + 'build-binutils'
|
||||
cwd.mkpath
|
||||
options.rootfs.mkpath
|
||||
|
||||
env = {
|
||||
'CC' => options.gcc,
|
||||
'CXX' => options.gxx
|
||||
}
|
||||
configure_options = [
|
||||
"--prefix=#{options.rootfs.realpath}",
|
||||
"--target=#{options.target}",
|
||||
'--disable-nls',
|
||||
'--enable-gprofng=no',
|
||||
'--disable-werror',
|
||||
'--enable-default-hash-style=gnu',
|
||||
'--disable-libquadmath'
|
||||
]
|
||||
configure = source_directory.relative_path_from(cwd) + 'configure'
|
||||
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
|
||||
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
|
||||
sh env, 'make', 'install', chdir: cwd.to_path
|
||||
end
|
||||
|
||||
desc 'Build stage 1 GCC'
|
||||
task :gcc1, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
options.tools.mkpath
|
||||
source_directory = download_and_unarchive(
|
||||
URI.parse("https://gcc.gnu.org/pub/gcc/releases/gcc-#{GCC_VERSION}/gcc-#{GCC_VERSION}.tar.xz"),
|
||||
options.tools)
|
||||
|
||||
cwd = source_directory.dirname + 'build-gcc'
|
||||
cwd.mkpath
|
||||
options.rootfs.mkpath
|
||||
options.sysroot.mkpath
|
||||
|
||||
sh 'contrib/download_prerequisites', chdir: source_directory.to_path
|
||||
configure_options = [
|
||||
"--prefix=#{options.rootfs.realpath}",
|
||||
"--with-sysroot=#{options.sysroot.realpath}",
|
||||
'--enable-languages=c,c++',
|
||||
'--disable-shared',
|
||||
'--with-arch=rv32imafdc',
|
||||
'--with-abi=ilp32d',
|
||||
'--with-tune=rocket',
|
||||
'--with-isa-spec=20191213',
|
||||
'--disable-bootstrap',
|
||||
'--disable-multilib',
|
||||
'--disable-libmudflap',
|
||||
'--disable-libssp',
|
||||
'--disable-libquadmath',
|
||||
'--disable-libsanitizer',
|
||||
'--disable-threads',
|
||||
'--disable-libatomic',
|
||||
'--disable-libgomp',
|
||||
'--disable-libvtv',
|
||||
'--disable-libstdcxx',
|
||||
'--disable-nls',
|
||||
'--with-newlib',
|
||||
'--without-headers',
|
||||
"--target=#{options.target}",
|
||||
"--build=#{options.build}",
|
||||
"--host=#{options.build}"
|
||||
]
|
||||
flags = '-O2 -fPIC'
|
||||
env = {
|
||||
'CC' => options.gcc,
|
||||
'CXX' => options.gxx,
|
||||
'CFLAGS' => flags,
|
||||
'CXXFLAGS' => flags,
|
||||
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}"
|
||||
}
|
||||
configure = source_directory.relative_path_from(cwd) + 'configure'
|
||||
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
|
||||
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
|
||||
sh env, 'make', 'install', chdir: cwd.to_path
|
||||
end
|
||||
|
||||
desc 'Copy glibc headers'
|
||||
task :headers, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
options.tools.mkpath
|
||||
|
||||
source_directory = download_and_unarchive(
|
||||
URI.parse("https://ftp.gnu.org/gnu/glibc/glibc-#{GLIBC_VERSION}.tar.xz"),
|
||||
options.tools)
|
||||
include_directory = options.tools + 'include'
|
||||
|
||||
include_directory.mkpath
|
||||
cp (source_directory + 'elf/elf.h'), (include_directory + 'elf.h')
|
||||
end
|
||||
|
||||
desc 'Build linux kernel'
|
||||
task :kernel, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
options.tools.mkpath
|
||||
|
||||
cwd = download_and_unarchive(
|
||||
URI.parse("https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-#{KERNEL_VERSION}.tar.xz"),
|
||||
options.tools)
|
||||
|
||||
env = {
|
||||
'CROSS_COMPILE' => "#{options.target}-",
|
||||
'ARCH' => 'riscv',
|
||||
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}",
|
||||
'HOSTCFLAGS' => "-D_UUID_T -D__GETHOSTUUID_H -I#{options.tools.realpath + 'include'}"
|
||||
}
|
||||
sh env, 'make', 'rv32_defconfig', chdir: cwd.to_path
|
||||
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
|
||||
sh env, 'make', 'headers', chdir: cwd.to_path
|
||||
|
||||
user_directory = options.sysroot + 'usr'
|
||||
|
||||
user_directory.mkpath
|
||||
cp_r (cwd + 'usr/include'), (user_directory + 'include')
|
||||
end
|
||||
|
||||
desc 'Build glibc'
|
||||
task :glibc, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
source_directory = options.tools + "glibc-#{GLIBC_VERSION}"
|
||||
configure_options = [
|
||||
'--prefix=/usr',
|
||||
"--host=#{options.target}",
|
||||
"--target=#{options.target}",
|
||||
"--build=#{options.build}",
|
||||
"--enable-kernel=#{KERNEL_VERSION}",
|
||||
"--with-headers=#{options.sysroot.realpath + 'usr/include'}",
|
||||
'--disable-nscd',
|
||||
'--disable-libquadmath',
|
||||
'--disable-libitm',
|
||||
'--disable-werror',
|
||||
'libc_cv_forced_unwind=yes'
|
||||
]
|
||||
bin = options.rootfs.realpath + 'bin'
|
||||
env = {
|
||||
'PATH' => "#{bin}:#{ENV['PATH']}",
|
||||
'MAKE' => 'make' # Otherwise it uses gnumake which can be different and too old.
|
||||
}
|
||||
cwd = source_directory.dirname + 'build-glibc'
|
||||
cwd.mkpath
|
||||
|
||||
configure = source_directory.relative_path_from(cwd) +'./configure'
|
||||
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
|
||||
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
|
||||
sh env, 'make', "install_root=#{options.sysroot.realpath}", 'install', chdir: cwd.to_path
|
||||
end
|
||||
|
||||
desc 'Build stage 2 GCC'
|
||||
task :gcc2, [:target] do |_, args|
|
||||
options = find_build_target GCC_VERSION, args
|
||||
source_directory = options.tools + "gcc-#{GCC_VERSION}"
|
||||
cwd = options.tools + 'build-gcc'
|
||||
|
||||
rm_rf cwd
|
||||
cwd.mkpath
|
||||
|
||||
configure_options = [
|
||||
"--prefix=#{options.rootfs.realpath}",
|
||||
"--with-sysroot=#{options.sysroot.realpath}",
|
||||
'--enable-languages=c,c++,lto',
|
||||
'--enable-lto',
|
||||
'--enable-shared',
|
||||
'--with-arch=rv32imafdc',
|
||||
'--with-abi=ilp32d',
|
||||
'--with-tune=rocket',
|
||||
'--with-isa-spec=20191213',
|
||||
'--disable-bootstrap',
|
||||
'--disable-multilib',
|
||||
'--enable-checking=release',
|
||||
'--disable-libssp',
|
||||
'--disable-libquadmath',
|
||||
'--enable-threads=posix',
|
||||
'--with-default-libstdcxx-abi=new',
|
||||
'--disable-nls',
|
||||
"--target=#{options.target}",
|
||||
"--build=#{options.build}",
|
||||
"--host=#{options.build}"
|
||||
|
||||
]
|
||||
flags = '-O2 -fPIC'
|
||||
env = {
|
||||
'CFLAGS' => flags,
|
||||
'CXXFLAGS' => flags,
|
||||
'PATH' => "#{options.rootfs.realpath + 'bin'}:#{ENV['PATH']}"
|
||||
}
|
||||
configure = source_directory.relative_path_from(cwd) + 'configure'
|
||||
sh env, configure.to_path, *configure_options, chdir: cwd.to_path
|
||||
sh env, 'make', '-j', Etc.nprocessors.to_s, chdir: cwd.to_path
|
||||
sh env, 'make', 'install', chdir: cwd.to_path
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Build cross toolchain'
|
||||
task cross: [
|
||||
'cross:binutils',
|
||||
'cross:gcc1',
|
||||
'cross:headers',
|
||||
'cross:kernel',
|
||||
'cross:glibc',
|
||||
'cross:gcc2'
|
||||
] do
|
||||
end
|
@@ -1,61 +0,0 @@
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public License,
|
||||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
|
||||
# obtain one at https://mozilla.org/MPL/2.0/. -}
|
||||
# frozen_string_literal: true
|
||||
|
||||
CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
|
||||
SYSROOT = 'build/sysroot'
|
||||
QEMU = 'qemu-riscv32'
|
||||
|
||||
def assemble_stage(output, compiler, source)
|
||||
arguments = [QEMU, '-L', SYSROOT, *compiler]
|
||||
|
||||
puts Term::ANSIColor.green(arguments * ' ')
|
||||
puts
|
||||
Open3.popen2(*arguments) do |qemu_in, qemu_out|
|
||||
qemu_in.write File.read(*source)
|
||||
qemu_in.close
|
||||
|
||||
IO.copy_stream qemu_out, output
|
||||
qemu_out.close
|
||||
end
|
||||
end
|
||||
|
||||
library = []
|
||||
|
||||
Dir.glob('boot/*.s').each do |assembly_source|
|
||||
source_basename = Pathname.new(assembly_source).basename
|
||||
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
|
||||
|
||||
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
|
||||
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
|
||||
end
|
||||
library << assembly_source unless source_basename.to_s.start_with? 'stage'
|
||||
end
|
||||
|
||||
desc 'Initial stage'
|
||||
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
|
||||
file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
|
||||
File.open t.name, 'w' do |output|
|
||||
assemble_stage output, exe, source
|
||||
end
|
||||
end
|
||||
|
||||
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
|
||||
file exe => [exe.ext('.s'), *library] do |t|
|
||||
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
|
||||
end
|
||||
end
|
||||
|
||||
file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
|
||||
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
|
||||
|
||||
File.open t.name, 'w' do |output|
|
||||
assemble_stage output, exe, source
|
||||
end
|
||||
end
|
Reference in New Issue
Block a user