Compare commits

...

No commits in common. "modula2" and "assembly" have entirely different histories.

25 changed files with 5055 additions and 2504 deletions

4
.gitignore vendored
View File

@ -1,3 +1,3 @@
a.out
/boot/
/build/
a.out
/vendor/

9
Gemfile Normal file
View File

@ -0,0 +1,9 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
source 'https://rubygems.org'
gem 'term-ansicolor', '~> 1.2'
gem 'rake', '~> 13.2'

22
Gemfile.lock Normal file
View File

@ -0,0 +1,22 @@
GEM
remote: https://rubygems.org/
specs:
bigdecimal (3.1.9)
rake (13.2.1)
sync (0.5.0)
term-ansicolor (1.11.2)
tins (~> 1.0)
tins (1.38.0)
bigdecimal
sync
PLATFORMS
ruby
x86_64-linux
DEPENDENCIES
rake (~> 13.2)
term-ansicolor (~> 1.2)
BUNDLED WITH
2.6.7

373
LICENSE Normal file
View File

@ -0,0 +1,373 @@
Mozilla Public License Version 2.0
==================================
1. Definitions
--------------
1.1. "Contributor"
means each individual or legal entity that creates, contributes to
the creation of, or owns Covered Software.
1.2. "Contributor Version"
means the combination of the Contributions of others (if any) used
by a Contributor and that particular Contributor's Contribution.
1.3. "Contribution"
means Covered Software of a particular Contributor.
1.4. "Covered Software"
means Source Code Form to which the initial Contributor has attached
the notice in Exhibit A, the Executable Form of such Source Code
Form, and Modifications of such Source Code Form, in each case
including portions thereof.
1.5. "Incompatible With Secondary Licenses"
means
(a) that the initial Contributor has attached the notice described
in Exhibit B to the Covered Software; or
(b) that the Covered Software was made available under the terms of
version 1.1 or earlier of the License, but not also under the
terms of a Secondary License.
1.6. "Executable Form"
means any form of the work other than Source Code Form.
1.7. "Larger Work"
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.
1.8. "License"
means this document.
1.9. "Licensable"
means having the right to grant, to the maximum extent possible,
whether at the time of the initial grant or subsequently, any and
all of the rights conveyed by this License.
1.10. "Modifications"
means any of the following:
(a) any file in Source Code Form that results from an addition to,
deletion from, or modification of the contents of Covered
Software; or
(b) any new file in Source Code Form that contains any Covered
Software.
1.11. "Patent Claims" of a Contributor
means any patent claim(s), including without limitation, method,
process, and apparatus claims, in any patent Licensable by such
Contributor that would be infringed, but for the grant of the
License, by the making, using, selling, offering for sale, having
made, import, or transfer of either its Contributions or its
Contributor Version.
1.12. "Secondary License"
means either the GNU General Public License, Version 2.0, the GNU
Lesser General Public License, Version 2.1, the GNU Affero General
Public License, Version 3.0, or any later versions of those
licenses.
1.13. "Source Code Form"
means the form of the work preferred for making modifications.
1.14. "You" (or "Your")
means an individual or a legal entity exercising rights under this
License. For legal entities, "You" includes any entity that
controls, is controlled by, or is under common control with You. For
purposes of this definition, "control" means (a) the power, direct
or indirect, to cause the direction or management of such entity,
whether by contract or otherwise, or (b) ownership of more than
fifty percent (50%) of the outstanding shares or beneficial
ownership of such entity.
2. License Grants and Conditions
--------------------------------
2.1. Grants
Each Contributor hereby grants You a world-wide, royalty-free,
non-exclusive license:
(a) under intellectual property rights (other than patent or trademark)
Licensable by such Contributor to use, reproduce, make available,
modify, display, perform, distribute, and otherwise exploit its
Contributions, either on an unmodified basis, with Modifications, or
as part of a Larger Work; and
(b) under Patent Claims of such Contributor to make, use, sell, offer
for sale, have made, import, and otherwise transfer either its
Contributions or its Contributor Version.
2.2. Effective Date
The licenses granted in Section 2.1 with respect to any Contribution
become effective for each Contribution on the date the Contributor first
distributes such Contribution.
2.3. Limitations on Grant Scope
The licenses granted in this Section 2 are the only rights granted under
this License. No additional rights or licenses will be implied from the
distribution or licensing of Covered Software under this License.
Notwithstanding Section 2.1(b) above, no patent license is granted by a
Contributor:
(a) for any code that a Contributor has removed from Covered Software;
or
(b) for infringements caused by: (i) Your and any other third party's
modifications of Covered Software, or (ii) the combination of its
Contributions with other software (except as part of its Contributor
Version); or
(c) under Patent Claims infringed by Covered Software in the absence of
its Contributions.
This License does not grant any rights in the trademarks, service marks,
or logos of any Contributor (except as may be necessary to comply with
the notice requirements in Section 3.4).
2.4. Subsequent Licenses
No Contributor makes additional grants as a result of Your choice to
distribute the Covered Software under a subsequent version of this
License (see Section 10.2) or under the terms of a Secondary License (if
permitted under the terms of Section 3.3).
2.5. Representation
Each Contributor represents that the Contributor believes its
Contributions are its original creation(s) or it has sufficient rights
to grant the rights to its Contributions conveyed by this License.
2.6. Fair Use
This License is not intended to limit any rights You have under
applicable copyright doctrines of fair use, fair dealing, or other
equivalents.
2.7. Conditions
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
in Section 2.1.
3. Responsibilities
-------------------
3.1. Distribution of Source Form
All distribution of Covered Software in Source Code Form, including any
Modifications that You create or to which You contribute, must be under
the terms of this License. You must inform recipients that the Source
Code Form of the Covered Software is governed by the terms of this
License, and how they can obtain a copy of this License. You may not
attempt to alter or restrict the recipients' rights in the Source Code
Form.
3.2. Distribution of Executable Form
If You distribute Covered Software in Executable Form then:
(a) such Covered Software must also be made available in Source Code
Form, as described in Section 3.1, and You must inform recipients of
the Executable Form how they can obtain a copy of such Source Code
Form by reasonable means in a timely manner, at a charge no more
than the cost of distribution to the recipient; and
(b) You may distribute such Executable Form under the terms of this
License, or sublicense it under different terms, provided that the
license for the Executable Form does not attempt to limit or alter
the recipients' rights in the Source Code Form under this License.
3.3. Distribution of a Larger Work
You may create and distribute a Larger Work under terms of Your choice,
provided that You also comply with the requirements of this License for
the Covered Software. If the Larger Work is a combination of Covered
Software with a work governed by one or more Secondary Licenses, and the
Covered Software is not Incompatible With Secondary Licenses, this
License permits You to additionally distribute such Covered Software
under the terms of such Secondary License(s), so that the recipient of
the Larger Work may, at their option, further distribute the Covered
Software under the terms of either this License or such Secondary
License(s).
3.4. Notices
You may not remove or alter the substance of any license notices
(including copyright notices, patent notices, disclaimers of warranty,
or limitations of liability) contained within the Source Code Form of
the Covered Software, except that You may alter any license notices to
the extent required to remedy known factual inaccuracies.
3.5. Application of Additional Terms
You may choose to offer, and to charge a fee for, warranty, support,
indemnity or liability obligations to one or more recipients of Covered
Software. However, You may do so only on Your own behalf, and not on
behalf of any Contributor. You must make it absolutely clear that any
such warranty, support, indemnity, or liability obligation is offered by
You alone, and You hereby agree to indemnify every Contributor for any
liability incurred by such Contributor as a result of warranty, support,
indemnity or liability terms You offer. You may include additional
disclaimers of warranty and limitations of liability specific to any
jurisdiction.
4. Inability to Comply Due to Statute or Regulation
---------------------------------------------------
If it is impossible for You to comply with any of the terms of this
License with respect to some or all of the Covered Software due to
statute, judicial order, or regulation then You must: (a) comply with
the terms of this License to the maximum extent possible; and (b)
describe the limitations and the code they affect. Such description must
be placed in a text file included with all distributions of the Covered
Software under this License. Except to the extent prohibited by statute
or regulation, such description must be sufficiently detailed for a
recipient of ordinary skill to be able to understand it.
5. Termination
--------------
5.1. The rights granted under this License will terminate automatically
if You fail to comply with any of its terms. However, if You become
compliant, then the rights granted under this License from a particular
Contributor are reinstated (a) provisionally, unless and until such
Contributor explicitly and finally terminates Your grants, and (b) on an
ongoing basis, if such Contributor fails to notify You of the
non-compliance by some reasonable means prior to 60 days after You have
come back into compliance. Moreover, Your grants from a particular
Contributor are reinstated on an ongoing basis if such Contributor
notifies You of the non-compliance by some reasonable means, this is the
first time You have received notice of non-compliance with this License
from such Contributor, and You become compliant prior to 30 days after
Your receipt of the notice.
5.2. If You initiate litigation against any entity by asserting a patent
infringement claim (excluding declaratory judgment actions,
counter-claims, and cross-claims) alleging that a Contributor Version
directly or indirectly infringes any patent, then the rights granted to
You by any and all Contributors for the Covered Software under Section
2.1 of this License shall terminate.
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
end user license agreements (excluding distributors and resellers) which
have been validly granted by You or Your distributors under this License
prior to termination shall survive termination.
************************************************************************
* *
* 6. Disclaimer of Warranty *
* ------------------------- *
* *
* Covered Software is provided under this License on an "as is" *
* basis, without warranty of any kind, either expressed, implied, or *
* statutory, including, without limitation, warranties that the *
* Covered Software is free of defects, merchantable, fit for a *
* particular purpose or non-infringing. The entire risk as to the *
* quality and performance of the Covered Software is with You. *
* Should any Covered Software prove defective in any respect, You *
* (not any Contributor) assume the cost of any necessary servicing, *
* repair, or correction. This disclaimer of warranty constitutes an *
* essential part of this License. No use of any Covered Software is *
* authorized under this License except under this disclaimer. *
* *
************************************************************************
************************************************************************
* *
* 7. Limitation of Liability *
* -------------------------- *
* *
* Under no circumstances and under no legal theory, whether tort *
* (including negligence), contract, or otherwise, shall any *
* Contributor, or anyone who distributes Covered Software as *
* permitted above, be liable to You for any direct, indirect, *
* special, incidental, or consequential damages of any character *
* including, without limitation, damages for lost profits, loss of *
* goodwill, work stoppage, computer failure or malfunction, or any *
* and all other commercial damages or losses, even if such party *
* shall have been informed of the possibility of such damages. This *
* limitation of liability shall not apply to liability for death or *
* personal injury resulting from such party's negligence to the *
* extent applicable law prohibits such limitation. Some *
* jurisdictions do not allow the exclusion or limitation of *
* incidental or consequential damages, so this exclusion and *
* limitation may not apply to You. *
* *
************************************************************************
8. Litigation
-------------
Any litigation relating to this License may be brought only in the
courts of a jurisdiction where the defendant maintains its principal
place of business and such litigation shall be governed by laws of that
jurisdiction, without reference to its conflict-of-law provisions.
Nothing in this Section shall prevent a party's ability to bring
cross-claims or counter-claims.
9. Miscellaneous
----------------
This License represents the complete agreement concerning the subject
matter hereof. If any provision of this License is held to be
unenforceable, such provision shall be reformed only to the extent
necessary to make it enforceable. Any law or regulation which provides
that the language of a contract shall be construed against the drafter
shall not be used to construe this License against a Contributor.
10. Versions of the License
---------------------------
10.1. New Versions
Mozilla Foundation is the license steward. Except as provided in Section
10.3, no one other than the license steward has the right to modify or
publish new versions of this License. Each version will be given a
distinguishing version number.
10.2. Effect of New Versions
You may distribute the Covered Software under the terms of the version
of the License under which You originally received the Covered Software,
or under the terms of any subsequent version published by the license
steward.
10.3. Modified Versions
If you create software not governed by this License, and you want to
create a new license for such software, you may create and use a
modified version of this License if you rename the license and remove
any references to the name of the license steward (except to note that
such modified license differs from this License).
10.4. Distributing Source Code Form that is Incompatible With Secondary
Licenses
If You choose to distribute Source Code Form that is Incompatible With
Secondary Licenses under the terms of this version of the License, the
notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
file in a relevant directory) where a recipient would be likely to look
for such a notice.
You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.

View File

@ -9,55 +9,3 @@ representation for a such high-level hypothetical programming language.
## File extension
.elna
## Current implementation
This repository contains a GCC frontend for Elna. After finishing the frontend
I'm planning to rewrite the compiler in Elna itself with its own backend and
a hand-written parser. So GCC gives a way to have a simple bootstrap compiler
and a possbility to compile Elna programs for different platforms.
## Grammar
program = block "." ;
block = [ "const" ident "=" number {"," ident "=" number} ";"]
[ "var" ident {"," ident} ";"]
{ "procedure" ident ";" block ";" } statement ;
statement = [ ident ":=" expression | "call" ident
| "?" ident | "!" expression
| "begin" statement {";" statement } "end"
| "if" condition "then" statement
| "while" condition "do" statement ];
condition = "odd" expression |
expression ("="|"#"|"<"|"<="|">"|">=") expression ;
expression = [ "+"|"-"] term { ("+"|"-") term};
term = factor {("*"|"/") factor};
factor = ident | number | "(" expression ")";
## Build
The frontend requires GCC 14.2.0 (not tested with other versions).
Download the GCC source. Copy the contents of this repository into `gcc/elna`
inside GCC. Finally build GCC enabling the frontend with
`--enable-languages=c,c++,elna`. After the installation the compiler can be
invoked with `$prefix/bin/gelna`.
There is also a `Rakefile` that downloads, builds and installs GCC into the
`./build/` subdirectory. The `Rakefile` assumes that ruby and rake, as well as
all GCC dependencies are already available in the system. It works under Linux
and Mac OS. In the latter case GCC is patched with the patches used by Homebrew
(official GCC doesn't support Apple silicon targets). Invoke with
```sh
rake boot
```
See `rake -T` for more tasks. The GCC source is under `build/tools`. The
installation path is `build/host/install`.

155
Rakefile
View File

@ -1,141 +1,38 @@
require 'pathname'
require 'rake/clean'
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
# frozen_string_literal: true
require 'open3'
require 'rake/clean'
require 'term/ansicolor'
M2C = 'gm2' # Modula-2 compiler.
CLEAN.include 'build/boot'
stage_compiler = Pathname.new 'build/stage1/elna'
directory 'build/boot'
directory 'build/stage1'
directory 'build/source'
directory 'build/self'
CLEAN.include 'build'
rule(/build\/stage1\/.+\.o$/ => ->(file) {
path = Pathname.new('boot/stage1/source') + Pathname.new(file).basename
['build/stage1', path.sub_ext('.def'), path.sub_ext('.mod')]
}) do |t|
sources = t.prerequisites.filter { |f| f.end_with? '.mod' }
sh M2C, '-c', '-I', 'boot/stage1/source', '-o', t.name, *sources
end
file 'build/stage1/elna' => FileList['boot/stage1/source/*'].map { |file|
File.join 'build', 'stage1', Pathname.new(file).basename.sub_ext('.o')
} do |t|
sh M2C, '-o', t.name, *t.prerequisites
end
file 'build/stage1/Compiler.o' => ['build/stage1', 'boot/stage1/source/Compiler.mod'] do |t|
sources = t.prerequisites.filter { |f| f.end_with? '.mod' }
sh M2C, '-fscaffold-main', '-c', '-I', 'boot/stage1/source', '-o', t.name, *sources
end
['source', 'self'].each do |sub|
rule(/build\/#{sub}\/.+\.mod$/ => [
"build/#{sub}", stage_compiler.to_path,
->(file) { File.join('source', Pathname.new(file).basename.sub_ext('.elna')) }
]) do |t|
sources, compiler = t.prerequisites
.reject { |f| File.directory? f }
.partition { |f| f.end_with? '.elna' }
File.open t.name, 'w' do |output|
compiler_command = compiler + sources
puts
puts(compiler_command * ' ')
Open3.popen2(*compiler_command) do |cl_in, cl_out|
cl_in.close
IO.copy_stream cl_out, output
cl_out.close
end
end
end
rule(/build\/#{sub}\/.+\.o$/ => ->(file) {
path = Pathname.new(file).relative_path_from('build')
result = []
result << File.join('source', path.basename.sub_ext('.def'))
result << File.join('build', path.sub_ext('.mod'))
}) do |t|
sources = t.prerequisites.filter { |f| f.end_with? '.mod' }
sh M2C, '-c', '-I', 'source', '-o', t.name, *sources
end
file "build/#{sub}/Compiler.o" => ["build/#{sub}/Compiler.mod"] do |t|
sh M2C, '-fscaffold-main', '-c', '-I', 'source', '-o', t.name, *t.prerequisites
end
stage_compiler = Pathname.new('build') + sub + 'elna'
file stage_compiler => FileList["source/*.elna"].map { |file|
File.join 'build', sub, Pathname.new(file).basename.sub_ext('.o')
} do |t|
sh M2C, '-o', t.name, *t.prerequisites
end
end
task default: 'build/self/elna'
task default: 'build/self/Compiler.mod'
task default: 'source/Compiler.elna'
task :default do |t|
desc 'Final stage'
task default: ['build/boot/stage2b', 'build/boot/stage2b.s', 'boot/stage2.elna'] do |t|
exe, previous_output, source = t.prerequisites
exe_arguments = [exe, source]
cat_arguments = ['cat', source]
compiler_arguments = [QEMU, '-L', SYSROOT, exe]
diff_arguments = ['diff', '-Nur', '--text', previous_output, '-']
puts [exe, diff_arguments * ' '].join(' | ')
Open3.pipeline exe_arguments, diff_arguments
Open3.pipeline(cat_arguments, compiler_arguments, diff_arguments)
end
task :backport do
FileList['source/*.elna'].each do |file|
source_path = Pathname.new file
source = File.read source_path
current_procedure = nil
target = ''
module_name = source_path.basename.sub_ext('')
file 'build/boot/test.s' => ['build/boot/stage1', 'boot/test.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
source
.gsub(/^(var|type|const|begin)/) { |match| match.upcase }
.gsub(/\b(record|nil|or|false|true)\b/) { |match| match.upcase }
.gsub(/proc\(/, 'PROCEDURE(')
.gsub(/ & /, ' AND ')
.gsub(/ -> /, ': ')
.gsub(/program;/, "MODULE #{module_name};")
.gsub(/module;/, "IMPLEMENTATION MODULE #{module_name};")
.gsub(/end\./, "END #{module_name}.")
.gsub(/([[:space:]]*)end(;?)$/, '\1END\2')
.gsub(/^([[:space:]]*)(while|return|if)\b/) { |match| match.upcase }
.gsub(/^from ([[:alnum:]]+) import/, 'FROM \1 IMPORT')
.gsub(/ \^([[:alnum:]])/, ' POINTER TO \1')
.gsub(/(then|do)$/) { |match| match.upcase }
.gsub(/(:|=) \[([[:digit:]]+)\]/, '\1 ARRAY[1..\2] OF ')
.each_line do |line|
if line.start_with? 'proc'
current_procedure = line[5...line.index('(')]
line = 'PROCEDURE ' + line[5..].gsub(',', ';')
elsif line.start_with?('END;') && !current_procedure.nil?
line = "END #{current_procedure};"
current_proceure = nil
elsif line.start_with?('end')
line = 'END ' + line[4..]
end
target += line
end
target_path = Pathname.new('boot/stage1/source') + source_path.basename.sub_ext('.mod')
File.write target_path, target
end
FileList['source/*.def'].each do |file|
cp file, File.join('boot/stage1/source', Pathname.new(file).basename)
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
file 'build/boot/test' => ['build/boot/test.s', 'boot/common-boot.s'] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
task test: 'build/boot/test' do |t|
sh QEMU, '-L', SYSROOT, t.prerequisites.first
end

630
boot/common-boot.s Normal file
View File

@ -0,0 +1,630 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global _is_alpha, _is_digit, _is_alnum, _is_upper, _is_lower
.global _write_s, _read_file, _write_error, _write_c, _write_i, _print_i
.global _memcmp, _memchr, _memmem, _memcpy, _mmap
.global _current, _get, _advance, _label_counter
.global _divide_by_zero_error, _exit, _strings_index, _string_equal
.section .rodata
.equ SYS_READ, 63
.equ SYS_WRITE, 64
.equ SYS_EXIT, 93
.equ SYS_MMAP2, 222
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
.equ PROT_READ, 0x1
.equ PROT_WRITE, 0x2
.equ MAP_PRIVATE, 0x02
.equ MAP_ANONYMOUS, 0x20
new_line: .ascii "\n"
.section .text
# Write the current token to stderr. Ends the output with a newline.
#
# a0 - String pointer.
# a1 - String length.
.type _write_error, @function
_write_error:
mv t0, a0
mv t1, a1
li a0, STDERR
mv a1, t0
mv a2, t1
li a7, SYS_WRITE
ecall
li a0, STDERR
la a1, new_line
li a2, 1
li a7, SYS_WRITE
ecall
ret
# a0 - First pointer.
# a1 - Second pointer.
# a2 - The length to compare.
#
# Returns 0 in a0 if memory regions are equal.
.type _memcmp, @function
_memcmp:
mv t0, a0
li a0, 0
.Lmemcmp_loop:
beqz a2, .Lmemcmp_end
lbu t1, (t0)
lbu t2, (a1)
sub a0, t1, t2
bnez a0, .Lmemcmp_end
addi t0, t0, 1
addi a1, a1, 1
addi a2, a2, -1
j .Lmemcmp_loop
.Lmemcmp_end:
ret
# Detects if a0 is an uppercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_upper, @function
_is_upper:
li t0, 'A' - 1
sltu t1, t0, a0 # t1 = a0 >= 'A'
sltiu t2, a0, 'Z' + 1 # t2 = a0 <= 'Z'
and a0, t1, t2 # t1 = a0 >= 'A' & a0 <= 'Z'
ret
# Detects if a0 is an lowercase character. Sets a0 to 1 if so, otherwise to 0.
.type _is_lower, @function
_is_lower:
li t0, 'a' - 1
sltu t2, t0, a0 # t2 = a0 >= 'a'
sltiu t3, a0, 'z' + 1 # t3 = a0 <= 'z'
and a0, t2, t3 # t2 = a0 >= 'a' & a0 <= 'z'
ret
# Detects if the passed character is a 7-bit alpha character or an underscore.
# The character is passed in a0.
# Sets a0 to 1 if the character is an alpha character or underscore, sets it to 0 otherwise.
.type _is_alpha, @function
_is_alpha:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_upper
sw a0, 0(sp)
lw a0, 4(sp)
call _is_lower
lw t0, 4(sp)
xori t1, t0, '_'
seqz t1, t1
lw t0, 0(sp)
or a0, a0, t0
or a0, a0, t1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Detects whether the passed character is a digit
# (a value between 0 and 9).
#
# Parameters:
# a0 - Exemined value.
#
# Sets a0 to 1 if it is a digit, to 0 otherwise.
.type _is_digit, @function
_is_digit:
li t0, '0' - 1
sltu t1, t0, a0 # t1 = a0 >= '0'
sltiu t2, a0, '9' + 1 # t2 = a0 <= '9'
and a0, t1, t2
ret
.type _is_alnum, @function
_is_alnum:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
call _is_alpha
sw a0, 0(sp)
lw a0, 4(sp)
call _is_digit
lw a1, 0(sp)
or a0, a0, a1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Writes a string to the standard output.
#
# Parameters:
# a0 - Length of the string.
# a1 - String pointer.
.type _write_s, @function
_write_s:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv a2, a0
li a0, STDOUT
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Reads standard input into a buffer.
# a0 - Buffer pointer.
# a1 - Buffer size.
#
# Sets s1 to the buffer passed in a0.
#
# Returns the amount of bytes written in a0.
.type _read_file, @function
_read_file:
# Prologue.
addi sp, sp, -8
sw ra, 4(sp)
sw s0, 0(sp)
addi s0, sp, 8
mv s1, a0
li a0, STDIN
mv a2, a1
mv a1, s1
li a7, SYS_READ
ecall
# Epilogue.
lw ra, 4(sp)
lw s0, 0(sp)
addi sp, sp, 8
ret
# Terminates the program. a0 contains the return code.
#
# Parameters:
# a0 - Status code.
.type _exit, @function
_exit:
li a7, SYS_EXIT
ecall
# ret
.type _divide_by_zero_error, @function
_divide_by_zero_error:
addi a7, zero, 172 # getpid
ecall
addi a1, zero, 8 # SIGFPE
addi a7, zero, 129 # kill
ecall
ret
# Writes a number to a string buffer.
#
# t0 - Local buffer.
# t1 - Constant 10.
# t2 - Current character.
# t3 - Whether the number is negative.
#
# Parameters:
# a0 - Whole number.
# a1 - Buffer pointer.
#
# Sets a0 to the length of the written number.
.type _print_i, @function
_print_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
li t1, 10
addi t0, s0, -9
li t3, 0
bgez a0, .Lprint_i_digit10
li t3, 1
neg a0, a0
.Lprint_i_digit10:
rem t2, a0, t1
addi t2, t2, '0'
sb t2, 0(t0)
div a0, a0, t1
addi t0, t0, -1
bne zero, a0, .Lprint_i_digit10
beq zero, t3, .Lprint_i_write_call
addi t2, zero, '-'
sb t2, 0(t0)
addi t0, t0, -1
.Lprint_i_write_call:
mv a0, a1
addi a1, t0, 1
sub a2, s0, t0
addi a2, a2, -9
sw a2, 0(sp)
call _memcpy
lw a0, 0(sp)
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a number to the standard output.
#
# Parameters:
# a0 - Whole number.
.type _write_i, @function
_write_i:
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
addi a1, sp, 0
call _print_i
addi a1, sp, 0
call _write_s
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Writes a character from a0 into the standard output.
.type _write_c, @function
_write_c:
# Prologue
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sb a0, 4(sp)
li a0, STDOUT
addi a1, sp, 4
li a2, 1
li a7, SYS_WRITE
ecall
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
add sp, sp, 16
ret
# a0 - Pointer to an array to get the first element.
#
# Dereferences a pointer and returns what is on the address in a0.
.type _get, @function
_get:
lw a0, (a0)
ret
# Searches for the occurences of a character in the given memory block.
#
# Parameters:
# a0 - Memory block.
# a1 - Needle.
# a2 - Memory size.
#
# Sets a0 to the pointer to the found character or to null if the character
# doesn't occur in the memory block.
.type _memchr, @function
_memchr:
.Lmemchr_loop:
beqz a2, .Lmemchr_nil # Exit if the length is 0.
lbu t0, (a0) # Load the character from the memory block.
beq t0, a1, .Lmemchr_end # Exit if the character was found.
# Otherwise, continue with the next character.
addi a0, a0, 1
addi a2, a2, -1
j .Lmemchr_loop
.Lmemchr_nil:
li a0, 0
.Lmemchr_end:
ret
# Locates a substring.
#
# Parameters:
# a0 - Haystack.
# a1 - Haystack size.
# a2 - Needle.
# a3 - Needle size.
#
# Sets a0 to the pointer to the beginning of the substring in memory or to 0
# if the substring doesn't occur in the block.
.type _memmem, @function
_memmem:
# Prologue.
addi sp, sp, -24
sw ra, 20(sp)
sw s0, 16(sp)
addi s0, sp, 24
# Save preserved registers. They are used to keep arguments.
sw s1, 12(sp)
sw s2, 8(sp)
sw s3, 4(sp)
sw s4, 0(sp)
mv s1, a0
mv s2, a1
mv s3, a2
mv s4, a3
.Lmemmem_loop:
blt s2, s3, .Lmemmem_nil # Exit if the needle length is greater than memory.
mv a0, s1
mv a1, s3
mv a2, s4
call _memcmp
mv t0, a0 # memcmp result.
mv a0, s1 # Memory pointer for the case the substring was found.
beqz t0, .Lmemmem_end
addi s1, s1, 1
add s2, s2, -1
j .Lmemmem_loop
.Lmemmem_nil:
li a0, 0
.Lmemmem_end:
# Restore the preserved registers.
lw s1, 12(sp)
lw s2, 8(sp)
lw s3, 4(sp)
lw s4, 0(sp)
# Epilogue.
lw ra, 20(sp)
lw s0, 16(sp)
add sp, sp, 24
ret
# Copies memory.
#
# Parameters:
# a0 - Destination.
# a1 - Source.
# a2 - Size.
#
# Preserves a0.
.type _memcpy, @function
_memcpy:
mv t0, a0
.Lmemcpy_loop:
beqz a2, .Lmemcpy_end
lbu t1, (a1)
sb t1, (a0)
addi a0, a0, 1
addi a1, a1, 1
addi a2, a2, -1
j .Lmemcpy_loop
.Lmemcpy_end:
mv a0, t0
ret
# Searches for a string in a string array.
#
# Parameters:
# a0 - Number of elements in the string array.
# a1 - String array.
# a2 - Needle length.
# a3 - Needle.
#
# Sets a0 to the 1-based index of the needle in the haystack or to 0 if the
# element could not be found.
.type _strings_index, @function
_strings_index:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp)
mv s1, a0
sw s2, 16(sp)
mv s2, a1
sw s3, 12(sp)
mv s3, a2
sw s4, 8(sp)
mv s4, a3
sw s5, 4(sp)
li s5, 0 # Index counter.
.Lstrings_index_loop:
addi s5, s5, 1
beqz s1, .Lstrings_index_missing
lw a2, (s2) # Read the length of the current element in the haystack.
bne a2, s3, .Lstrings_index_next # Lengths don't match, skip the iteration.
addi a0, s2, 4
mv a1, s4
call _memcmp
beqz a0, .Lstrings_index_end
.Lstrings_index_next:
# Advance the pointer, reduce the length.
lw a2, (s2)
addi s2, s2, 4
add s2, s2, a2
addi s1, s1, -1
j .Lstrings_index_loop
.Lstrings_index_missing:
li s5, 0
.Lstrings_index_end:
mv a0, s5
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
lw s4, 8(sp)
lw s5, 4(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
add sp, sp, 32
ret
# Compares two strings for equality.
#
# Parameters:
# a0 - Length of the first string.
# a1 - Pointer to the first string.
# a2 - Length of the second string.
# a3 - Pointer to the second string.
#
# Sets a0 to 1 if the string are equal, to 0 if not.
.type _string_equal, @function
_string_equal:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
# Compare string lengths.
bne a0, a2, .Lstring_equal_not_found
# If lengths match, compare the content.
mv a0, a1
mv a1, a3
# a2 is already set to the length.
call _memcmp
bnez a0, .Lstring_equal_not_found
li a0, 1
j .Lstring_equal_end
.Lstring_equal_not_found:
mv a0, zero
.Lstring_equal_end:
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Sets a0 to the mapping address.
.type _mmap, @function
_mmap:
li a0, 0 # Address at which to create the mapping.
li a1, 4096 # The length of the mapping.
li a2, PROT_READ | PROT_WRITE # Protection flags.
li a3, MAP_ANONYMOUS | MAP_PRIVATE # The mapping is not backed by a file.
li a4, -1 # File descriptor.
li a5, 0 # Page offset.
li a7, SYS_MMAP2
ecall
ret
# Sets the a0 to the current position in the source text (s1).
.type _current, @function
_current:
mv a0, s1
ret
# Advances the position of the source text.
#
# Parameters:
# a0 - The number of bytes to advance.
.type _advance, @function
_advance:
add s1, s1, a0
ret
# Advances the global label counter by 1 setting a0 to the previous value.
#
# Parameters:
# a0 - If it is 0, resets the counter to 1.
.type _label_counter, @function
_label_counter:
bnez a0, .Llabel_counter_advance
li s2, 0
.Llabel_counter_advance:
mv a0, s2
addi s2, s2, 1
ret

68
boot/definitions.inc Normal file
View File

@ -0,0 +1,68 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
#
# Tokens.
#
# The constant should match the index in the keywords array in tokenizer.s.
.equ TOKEN_PROGRAM, 1
.equ TOKEN_IMPORT, 2
.equ TOKEN_CONST, 3
.equ TOKEN_VAR, 4
.equ TOKEN_IF, 5
.equ TOKEN_THEN, 6
.equ TOKEN_ELSIF, 7
.equ TOKEN_ELSE, 8
.equ TOKEN_WHILE, 9
.equ TOKEN_DO, 10
.equ TOKEN_PROC, 11
.equ TOKEN_BEGIN, 12
.equ TOKEN_END, 13
.equ TOKEN_TYPE, 14
.equ TOKEN_RECORD, 15
.equ TOKEN_UNION, 16
.equ TOKEN_TRUE, 17
.equ TOKEN_FALSE, 18
.equ TOKEN_NIL, 19
.equ TOKEN_XOR, 20
.equ TOKEN_OR, 21
.equ TOKEN_RETURN, 22
.equ TOKEN_CAST, 23
.equ TOKEN_GOTO, 24
.equ TOKEN_CASE, 25
.equ TOKEN_OF, 26
.equ TOKEN_IDENTIFIER, 27
# The constant should match the character index in the byte_keywords string.
.equ TOKEN_AND, TOKEN_IDENTIFIER + 1
.equ TOKEN_DOT, TOKEN_IDENTIFIER + 2
.equ TOKEN_COMMA, TOKEN_IDENTIFIER + 3
.equ TOKEN_COLON, TOKEN_IDENTIFIER + 4
.equ TOKEN_SEMICOLON, TOKEN_IDENTIFIER + 5
.equ TOKEN_LEFT_PAREN, TOKEN_IDENTIFIER + 6
.equ TOKEN_RIGHT_PAREN, TOKEN_IDENTIFIER + 7
.equ TOKEN_LEFT_BRACKET, TOKEN_IDENTIFIER + 8
.equ TOKEN_RIGHT_BRACKET, TOKEN_IDENTIFIER + 9
.equ TOKEN_HAT, TOKEN_IDENTIFIER + 10
.equ TOKEN_EQUALS, TOKEN_IDENTIFIER + 11
.equ TOKEN_PLUS, TOKEN_IDENTIFIER + 12
.equ TOKEN_MINUS, TOKEN_IDENTIFIER + 13
.equ TOKEN_ASTERISK, TOKEN_IDENTIFIER + 14
.equ TOKEN_AT, TOKEN_IDENTIFIER + 15
.equ TOKEN_ASSIGN, 43
.equ TOKEN_INTEGER, 44
#
# Symbols.
#
.equ TYPE_PRIMITIVE, 0x01
.equ TYPE_POINTER, 0x02
.equ TYPE_PROCEDURE, 0x03
.equ INFO_PARAMETER, 0x10
.equ INFO_LOCAL, 0x20
.equ INFO_PROCEDURE, 0x30

1544
boot/stage1.s Normal file

File diff suppressed because it is too large Load Diff

1393
boot/stage2.elna Normal file

File diff suppressed because it is too large Load Diff

297
boot/symbol.s Normal file
View File

@ -0,0 +1,297 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global symbol_table
.global symbol_table_build, symbol_table_lookup, symbol_table_enter, symbol_table_dump
.global symbol_table_make_pointer, symbol_table_make_parameter, symbol_table_make_local, symbol_table_make_procedure
.include "boot/definitions.inc"
.equ SYMBOL_PRIME, 1543
.section .rodata
.type symbol_builtin_name_int, @object
symbol_builtin_name_int: .ascii "Int"
.type symbol_builtin_name_word, @object
symbol_builtin_name_word: .ascii "Word"
.type symbol_builtin_name_byte, @object
symbol_builtin_name_byte: .ascii "Byte"
.type symbol_builtin_name_char, @object
symbol_builtin_name_char: .ascii "Char"
.type symbol_builtin_name_bool, @object
symbol_builtin_name_bool: .ascii "Bool"
# Every type info starts with a word describing what type it is.
# Primitive types have only type size.
.type symbol_builtin_type_int, @object
symbol_builtin_type_int: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_word, @object
symbol_builtin_type_word: .word TYPE_PRIMITIVE
.word 4
.type symbol_builtin_type_byte, @object
symbol_builtin_type_byte: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_char, @object
symbol_builtin_type_char: .word TYPE_PRIMITIVE
.word 1
.type symbol_builtin_type_bool, @object
symbol_builtin_type_bool: .word TYPE_PRIMITIVE
.word 1
.section .bss
# The first word of the symbol table is its length.
# Then a list of type infos follows:
#
# record
# name: String
# info: ^TypeInfo
# end
.type symbol_table, @object
symbol_table: .zero SYMBOL_PRIME
.section .text
# Prints the list of symbols in the table.
.type symbol_table_dump, @function
symbol_table_dump:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Current symbol in the table.
sw s2, 16(sp) # Symbol table length.
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_dump_loop:
beqz s2, .Lsymbol_table_dump_end
# Compare string lengths.
lw a0, 4(s1)
lw a1, 0(s1)
call _write_error
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_dump_loop
.Lsymbol_table_dump_end:
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Searches for a symbol by name.
#
# Parameters:
# a0 - Length of the symbol to search.
# a1 - Pointer to the symbol name.
#
# Sets a0 to the symbol info pointer or 0 if the symbol has not been found.
.type symbol_table_lookup, @function
symbol_table_lookup:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Current symbol in the table.
sw s2, 16(sp) # Symbol table length.
sw s3, 12(sp) # Length of the symbol to search.
sw s4, 8(sp) # Pointer to the symbol to search.
mv s3, a0
mv s4, a1
la s1, symbol_table
lw s2, 0(s1)
addi s1, s1, 4 # Advance to the first symbol in the table.
.Lsymbol_table_lookup_loop:
beqz s2, .Lsymbol_table_lookup_not_found
# Compare string lengths.
mv a0, s3
mv a1, s4
lw a2, 0(s1)
lw a3, 4(s1)
call _string_equal
beqz a0, .Lsymbol_table_lookup_continue
lw a0, 8(s1) # Pointer to the symbol.
j .Lsymbol_table_lookup_end
.Lsymbol_table_lookup_continue:
addi s1, s1, 12
addi s2, s2, -1
j .Lsymbol_table_lookup_loop
.Lsymbol_table_lookup_not_found:
li a0, 0
.Lsymbol_table_lookup_end:
lw s1, 20(sp)
lw s2, 16(sp)
lw s3, 12(sp)
lw s4, 8(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret
# Creates a pointer type.
#
# Parameters:
# a0 - Pointer to the base type.
# a1 - Output memory.
#
# Sets a0 to the size of newly created type in bytes.
.type symbol_table_make_pointer, @function
symbol_table_make_pointer:
li t0, TYPE_POINTER
sw t0, 0(a1)
sw a0, 4(a1)
li a0, 8
ret
# Creates a parameter info.
#
# Parameters:
# a0 - Pointer to the parameter type.
# a1 - Parameter offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_parameter, @function
symbol_table_make_parameter:
li t0, INFO_PARAMETER
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a local variable info.
#
# Parameters:
# a0 - Pointer to the variable type.
# a1 - Variable stack offset.
# a2 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_local, @function
symbol_table_make_local:
li t0, INFO_LOCAL
sw t0, 0(a2)
sw a0, 4(a2)
sw a1, 8(a2)
li a0, 12
ret
# Creates a procedure type and procedure info objects refering the type.
#
# Parameters:
# a0 - Output memory.
#
# Sets a0 to the size of newly created info object in bytes.
.type symbol_table_make_procedure, @function
symbol_table_make_procedure:
li t0, TYPE_PROCEDURE
sw t0, 8(a0)
li t0, INFO_PROCEDURE
sw t0, 0(a0)
sw a0, 4(a0) # Procedure type stored in the same memory segment.
li a0, 12
ret
# Inserts a symbol into the table.
#
# Parameters:
# a0 - Symbol name length.
# a1 - Symbol name pointer.
# a2 - Symbol pointer.
.type symbol_table_enter, @function
symbol_table_enter:
la t0, symbol_table
lw t1, 0(t0) # Current table length.
li t2, 12 # Calculate the offset to the next entry.
mul t2, t1, t2
addi t2, t2, 4
add t2, t0, t2
sw a0, 0(t2)
sw a1, 4(t2)
sw a2, 8(t2)
addi t1, t1, 1 # Save the new length.
sw t1, 0(t0)
ret
# Build the initial symbols.
#
# Sets a0 to the pointer to the global symbol table.
.type symbol_build, @function
symbol_table_build:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
la a0, symbol_table
addi t0, a0, 4
li a0, 3 # Length of the word "Int".
la a1, symbol_builtin_name_int
la a2, symbol_builtin_type_int
call symbol_table_enter
li a0, 4 # Length of the word "Word".
la a1, symbol_builtin_name_word
la a2, symbol_builtin_type_word
call symbol_table_enter
li a0, 4 # Length of the word "Byte".
la a1, symbol_builtin_name_byte
la a2, symbol_builtin_type_byte
call symbol_table_enter
li a0, 4 # Length of the word "Char".
la a1, symbol_builtin_name_char
la a2, symbol_builtin_type_char
call symbol_table_enter
li a0, 4 # Length of the word "Bool".
la a1, symbol_builtin_name_bool
la a2, symbol_builtin_type_bool
call symbol_table_enter
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret

14
boot/test.elna Normal file
View File

@ -0,0 +1,14 @@
program
proc main(x: Word, y: Word)
begin
_write_s(4, @x);
_write_s(4, @y);
y := 0x0a2c3063;
_write_s(4, @y)
end
begin
main(0x0a2c3061, 0x0a2c3062)
end.

616
boot/tokenizer.s Normal file
View File

@ -0,0 +1,616 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
.global lex_next, classification, transitions, keywords, byte_keywords
.include "boot/definitions.inc"
.section .rodata
#
# Classification table assigns each possible character to a group (class). All
# characters of the same group a handled equivalently.
#
# Classification:
#
.equ CLASS_INVALID, 0x00
.equ CLASS_DIGIT, 0x01
.equ CLASS_CHARACTER, 0x02
.equ CLASS_SPACE, 0x03
.equ CLASS_COLON, 0x04
.equ CLASS_EQUALS, 0x05
.equ CLASS_LEFT_PAREN, 0x06
.equ CLASS_RIGHT_PAREN, 0x07
.equ CLASS_ASTERISK, 0x08
.equ CLASS_UNDERSCORE, 0x09
.equ CLASS_SINGLE, 0x0a
.equ CLASS_HEX, 0x0b
.equ CLASS_ZERO, 0x0c
.equ CLASS_X, 0x0d
.equ CLASS_EOF, 0x0e
.equ CLASS_DOT, 0x0f
.equ CLASS_MINUS, 0x10
.equ CLASS_QUOTE, 0x11
.equ CLASS_GREATER, 0x12
.equ CLASS_LESS, 0x13
.equ CLASS_COUNT, 20
.type classification, @object
classification:
.byte CLASS_EOF # 00 NUL
.byte CLASS_INVALID # 01 SOH
.byte CLASS_INVALID # 02 STX
.byte CLASS_INVALID # 03 ETX
.byte CLASS_INVALID # 04 EOT
.byte CLASS_INVALID # 05 ENQ
.byte CLASS_INVALID # 06 ACK
.byte CLASS_INVALID # 07 BEL
.byte CLASS_INVALID # 08 BS
.byte CLASS_SPACE # 09 HT
.byte CLASS_SPACE # 0A LF
.byte CLASS_INVALID # 0B VT
.byte CLASS_INVALID # 0C FF
.byte CLASS_SPACE # 0D CR
.byte CLASS_INVALID # 0E SO
.byte CLASS_INVALID # 0F SI
.byte CLASS_INVALID # 10 DLE
.byte CLASS_INVALID # 11 DC1
.byte CLASS_INVALID # 12 DC2
.byte CLASS_INVALID # 13 DC3
.byte CLASS_INVALID # 14 DC4
.byte CLASS_INVALID # 15 NAK
.byte CLASS_INVALID # 16 SYN
.byte CLASS_INVALID # 17 ETB
.byte CLASS_INVALID # 18 CAN
.byte CLASS_INVALID # 19 EM
.byte CLASS_INVALID # 1A SUB
.byte CLASS_INVALID # 1B ESC
.byte CLASS_INVALID # 1C FS
.byte CLASS_INVALID # 1D GS
.byte CLASS_INVALID # 1E RS
.byte CLASS_INVALID # 1F US
.byte CLASS_SPACE # 20 Space
.byte CLASS_SINGLE # 21 !
.byte CLASS_QUOTE # 22 "
.byte 0x00 # 23 #
.byte 0x00 # 24 $
.byte CLASS_SINGLE # 25 %
.byte CLASS_SINGLE # 26 &
.byte CLASS_QUOTE # 27 '
.byte CLASS_LEFT_PAREN # 28 (
.byte CLASS_RIGHT_PAREN # 29 )
.byte CLASS_ASTERISK # 2A *
.byte CLASS_SINGLE # 2B +
.byte CLASS_SINGLE # 2C ,
.byte CLASS_MINUS # 2D -
.byte CLASS_DOT # 2E .
.byte CLASS_SINGLE # 2F /
.byte CLASS_ZERO # 30 0
.byte CLASS_DIGIT # 31 1
.byte CLASS_DIGIT # 32 2
.byte CLASS_DIGIT # 33 3
.byte CLASS_DIGIT # 34 4
.byte CLASS_DIGIT # 35 5
.byte CLASS_DIGIT # 36 6
.byte CLASS_DIGIT # 37 7
.byte CLASS_DIGIT # 38 8
.byte CLASS_DIGIT # 39 9
.byte CLASS_COLON # 3A :
.byte CLASS_SINGLE # 3B ;
.byte CLASS_LESS # 3C <
.byte CLASS_EQUALS # 3D =
.byte CLASS_GREATER # 3E >
.byte 0x00 # 3F ?
.byte CLASS_SINGLE # 40 @
.byte CLASS_CHARACTER # 41 A
.byte CLASS_CHARACTER # 42 B
.byte CLASS_CHARACTER # 43 C
.byte CLASS_CHARACTER # 44 D
.byte CLASS_CHARACTER # 45 E
.byte CLASS_CHARACTER # 46 F
.byte CLASS_CHARACTER # 47 G
.byte CLASS_CHARACTER # 48 H
.byte CLASS_CHARACTER # 49 I
.byte CLASS_CHARACTER # 4A J
.byte CLASS_CHARACTER # 4B K
.byte CLASS_CHARACTER # 4C L
.byte CLASS_CHARACTER # 4D M
.byte CLASS_CHARACTER # 4E N
.byte CLASS_CHARACTER # 4F O
.byte CLASS_CHARACTER # 50 P
.byte CLASS_CHARACTER # 51 Q
.byte CLASS_CHARACTER # 52 R
.byte CLASS_CHARACTER # 53 S
.byte CLASS_CHARACTER # 54 T
.byte CLASS_CHARACTER # 55 U
.byte CLASS_CHARACTER # 56 V
.byte CLASS_CHARACTER # 57 W
.byte CLASS_CHARACTER # 58 X
.byte CLASS_CHARACTER # 59 Y
.byte CLASS_CHARACTER # 5A Z
.byte CLASS_SINGLE # 5B [
.byte 0x00 # 5C \
.byte CLASS_SINGLE # 5D ]
.byte CLASS_SINGLE # 5E ^
.byte CLASS_UNDERSCORE # 5F _
.byte 0x00 # 60 `
.byte CLASS_HEX # 61 a
.byte CLASS_HEX # 62 b
.byte CLASS_HEX # 63 c
.byte CLASS_HEX # 64 d
.byte CLASS_HEX # 65 e
.byte CLASS_HEX # 66 f
.byte CLASS_CHARACTER # 67 g
.byte CLASS_CHARACTER # 68 h
.byte CLASS_CHARACTER # 69 i
.byte CLASS_CHARACTER # 6A j
.byte CLASS_CHARACTER # 6B k
.byte CLASS_CHARACTER # 6C l
.byte CLASS_CHARACTER # 6D m
.byte CLASS_CHARACTER # 6E n
.byte CLASS_CHARACTER # 6F o
.byte CLASS_CHARACTER # 70 p
.byte CLASS_CHARACTER # 71 q
.byte CLASS_CHARACTER # 72 r
.byte CLASS_CHARACTER # 73 s
.byte CLASS_CHARACTER # 74 t
.byte CLASS_CHARACTER # 75 u
.byte CLASS_CHARACTER # 76 v
.byte CLASS_CHARACTER # 77 w
.byte CLASS_X # 78 x
.byte CLASS_CHARACTER # 79 y
.byte CLASS_CHARACTER # 7A z
.byte 0x00 # 7B {
.byte CLASS_SINGLE # 7C |
.byte 0x00 # 7D }
.byte CLASS_SINGLE # 7E ~
.byte CLASS_INVALID # 7F DEL
#
# Textual keywords in the language.
#
.equ KEYWORDS_COUNT, TOKEN_IDENTIFIER - 1
.type keywords, @object
keywords:
.word 7
.ascii "program"
.word 6
.ascii "import"
.word 5
.ascii "const"
.word 3
.ascii "var"
.word 2
.ascii "if"
.word 4
.ascii "then"
.word 5
.ascii "elsif"
.word 4
.ascii "else"
.word 5
.ascii "while"
.word 2
.ascii "do"
.word 4
.ascii "proc"
.word 5
.ascii "begin"
.word 3
.ascii "end"
.word 4
.ascii "type"
.word 6
.ascii "record"
.word 5
.ascii "union"
.word 4
.ascii "true"
.word 5
.ascii "false"
.word 3
.ascii "nil"
.word 3
.ascii "xor"
.word 2
.ascii "or"
.word 6
.ascii "return"
.word 4
.ascii "cast"
.word 4
.ascii "goto"
.word 4
.ascii "case"
.word 2
.ascii "of"
.type byte_keywords, @object
byte_keywords: .ascii "&.,:;()[]^=+-*@"
.equ BYTE_KEYWORDS_SIZE, . - byte_keywords
.section .data
# The transition table describes transitions from one state to another, given
# a symbol (character class).
#
# The table has m rows and n columns, where m is the amount of states and n is
# the amount of classes. So given the current state and a classified character
# the table can be used to look up the next state.
#
# Each cell is a word long.
# - The least significant byte of the word is a row number (beginning with 0).
# It specifies the target state. "ff" means that this is an end state and no
# transition is possible.
# - The next byte is the action that should be performed when transitioning.
# For the meaning of actions see labels in the lex_next function, which
# handles each action.
#
.type transitions, @object
transitions:
# Invalid Digit Alpha Space : = ( )
# * _ Single Hex 0 x NUL .
# - " or ' > <
.word 0x00ff, 0x0103, 0x0102, 0x0300, 0x0101, 0x06ff, 0x0106, 0x06ff
.word 0x06ff, 0x0102, 0x06ff, 0x0102, 0x010c, 0x0102, 0x00ff, 0x06ff
.word 0x0105, 0x0110, 0x0104, 0x0107 # 0x00 Start
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x07ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff # 0x01 Colon
.word 0x05ff, 0x0102, 0x0102, 0x05ff, 0x05ff, 0x05ff, 0x05ff, 0x05ff
.word 0x05ff, 0x0102, 0x05ff, 0x0102, 0x0102, 0x0102, 0x05ff, 0x05ff
.word 0x05ff, 0x05ff, 0x05ff, 0x05ff # 0x02 Identifier
.word 0x08ff, 0x0103, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x0103, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x03 Decimal
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x04ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x04ff, 0x02ff # 0x04 Greater
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x04ff, 0x06ff # 0x05 Minus
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x0109, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff, 0x06ff
.word 0x06ff, 0x06ff, 0x06ff, 0x06ff # 0x06 Left paren
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff, 0x02ff
.word 0x02ff, 0x02ff, 0x02ff, 0x04ff # 0x07 Less
.word 0x08ff, 0x0108, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x0108, 0x0108, 0x00ff, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x08 Hexadecimal after 0x.
.word 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x09 Comment
.word 0x00ff, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x04ff
.word 0x010a, 0x0109, 0x0109, 0x0109, 0x0109, 0x0109, 0x00ff, 0x0109
.word 0x0109, 0x0109, 0x0109, 0x0109 # 0x0a Closing comment
.word 0x00ff, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x010b, 0x0110
.word 0x010b, 0x04ff, 0x010b, 0x010b # 0x0b String
.word 0x08ff, 0x00ff, 0x00ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff, 0x08ff
.word 0x08ff, 0x00ff, 0x08ff, 0x00ff, 0x00ff, 0x010d, 0x08ff, 0x08ff
.word 0x08ff, 0x08ff, 0x08ff, 0x08ff # 0x0c Leading zero
.word 0x00ff, 0x0108, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x0108, 0x0108, 0x00ff, 0x00ff, 0x00ff
.word 0x00ff, 0x00ff, 0x00ff, 0x00ff # 0x0d Starting hexadecimal
.section .text
# Returns the class from the classification table for the given character.
#
# Parameters:
# a0 - Character.
#
# Sets a0 to the class number.
.type classify, @function
classify:
la t0, classification
add t0, t0, a0 # Character class pointer.
lbu a0, (t0) # Character class.
ret
# Given the current state and a character class, calculates the next state.
# Parameters:
# a0 - Current state.
# a1 - Character class.
#
# Sets a0 to the next state.
.type lookup_state, @function
lookup_state:
li t0, CLASS_COUNT
mul a0, a0, t0 # Transition row.
add a0, a0, a1 # Transition column.
li t0, 4
mul a0, a0, t0 # Multiply by the word size.
la t0, transitions
add t0, t0, a0
lw a0, (t0) # Next state.
ret
# Chains classify and lookup_state.
#
# Parameters:
# a0 - Current state.
# a1 - Character.
#
# Sets a0 to the next state based on the given character.
.type _next_state, @function
_next_state:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
sw a0, 4(sp)
mv a0, a1
call classify
mv a1, a0
lw a0, 4(sp)
call lookup_state
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes an identifier and checks whether it's a keyword.
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type classify_identifier, @function
classify_identifier:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a2, a0
mv a3, a1
li a0, KEYWORDS_COUNT
la a1, keywords
call _strings_index
bnez a0, .Lclassify_identifier_end
li a0, TOKEN_IDENTIFIER
.Lclassify_identifier_end:
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Takes a symbol and determines its type.
#
# Parameters:
# a0 - Token character.
#
# Sets a0 to the appropriate token type.
.type classify_single, @function
classify_single:
# Prologue.
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
mv a1, a0
li a2, BYTE_KEYWORDS_SIZE
la a0, byte_keywords
call _memchr
la a1, byte_keywords
sub a0, a0, a1
addi a0, a0, TOKEN_IDENTIFIER + 1
# Epilogue.
lw ra, 12(sp)
lw s0, 8(sp)
addi sp, sp, 16
ret
# Classified a symbol containing multiple characters (probably 2).
#
# Parameters:
# a0 - Token length.
# a1 - Token pointer.
#
# Sets a0 to the appropriate token type.
.type classify_composite, @function
classify_composite:
lbu t0, 0(a1)
li t1, ':'
beq t0, t1, .Lclassify_composite_assign
j .Lclassify_composite_end
.Lclassify_composite_assign:
li a0, TOKEN_ASSIGN
j .Lclassify_composite_end
.Lclassify_composite_end:
ret
# Initializes the classification table.
#
# Paramaters:
# a0 - Source text pointer.
# a1 - A pointer for output value, the token kind. 4 Bytes.
#
# Sets a0 to the position of the next token.
.type lex_next, @function
lex_next:
# Prologue.
addi sp, sp, -32
sw ra, 28(sp)
sw s0, 24(sp)
addi s0, sp, 32
sw s1, 20(sp) # Preserve s1 used for current source text position.
mv s1, a0
sw a0, 12(sp) # Keeps a pointer to the beginning of a token.
# 4(sp) and 8(sp) are reserved for the kind and length of the token if needed.
sw s2, 16(sp) # Preserve s2 containing the current state.
li s2, 0x00 # Initial, start state.
sw a1, 0(sp)
sw zero, (a1) # Initialize.
.Llex_next_loop:
mv a0, s2
lbu a1, (s1)
call _next_state
li t0, 0xff
and s2, a0, t0 # Next state.
li t0, 0xff00
and t1, a0, t0 # Transition action.
srli t1, t1, 8
# Perform the provided action.
li t0, 0x01 # Accumulate action.
beq t1, t0, .Llex_next_accumulate
li t0, 0x02 # Print action.
beq t1, t0, .Llex_next_print
li t0, 0x03 # Skip action.
beq t1, t0, .Llex_next_skip
li t0, 0x04 # Delimited string action.
beq t1, t0, .Llex_next_comment
li t0, 0x05 # Finalize identifier.
beq t1, t0, .Llex_next_identifier
li t0, 0x06 # Single character symbol action.
beq t1, t0, .Llex_next_single
li t0, 0x07 # An action for symbols containing multiple characters.
beq t1, t0, .Llex_next_composite
li t0, 0x08 # Integer action.
beq t1, t0, .Llex_next_integer
j .Llex_next_reject
.Llex_next_reject:
addi s1, s1, 1
j .Llex_next_end
.Llex_next_accumulate:
addi s1, s1, 1
j .Llex_next_loop
.Llex_next_skip:
addi s1, s1, 1
lw t0, 12(sp)
addi t0, t0, 1
sw t0, 12(sp)
j .Llex_next_loop
.Llex_next_print:
/* DEBUG
addi a0, a0, 21
sw a0, 0(sp)
addi a0, sp, 0
li a1, 1
call _write_error */
j .Llex_next_end
.Llex_next_comment:
addi s1, s1, 1
j .Llex_next_end
.Llex_next_identifier:
# An identifier can be a textual keyword.
# Check the kind of the token and write it into the output parameter.
lw a1, 12(sp)
sub a0, s1, a1
sw a0, 8(sp)
call classify_identifier
sw a0, 4(sp)
lw a0, 0(sp)
addi a1, sp, 4
li a2, 12
call _memcpy
j .Llex_next_end
.Llex_next_single:
lw a0, 12(sp)
addi s1, a0, 1
lbu a0, (a0)
call classify_single
lw a1, 0(sp)
sw a0, (a1)
j .Llex_next_end
.Llex_next_composite:
addi s1, s1, 1
lw a1, 12(sp)
sub a0, s1, a1
call classify_composite
lw a1, 0(sp)
sw a0, (a1)
j .Llex_next_end
.Llex_next_integer:
lw t0, 0(sp)
li t1, TOKEN_INTEGER
sw t1, 0(t0)
lw t1, 12(sp)
sw t1, 8(t0)
sub t1, s1, t1
sw t1, 4(t0)
j .Llex_next_end
.Llex_next_end:
mv a0, s1 # Return the advanced text pointer.
# Restore saved registers.
lw s1, 20(sp)
lw s2, 16(sp)
# Epilogue.
lw ra, 28(sp)
lw s0, 24(sp)
addi sp, sp, 32
ret

61
rakelib/stage.rake Normal file
View File

@ -0,0 +1,61 @@
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/. -}
# frozen_string_literal: true
CROSS_GCC = 'build/rootfs/bin/riscv32-unknown-linux-gnu-gcc'
SYSROOT = 'build/sysroot'
QEMU = 'qemu-riscv32'
def assemble_stage(output, compiler, source)
arguments = [QEMU, '-L', SYSROOT, *compiler]
puts Term::ANSIColor.green(arguments * ' ')
puts
Open3.popen2(*arguments) do |qemu_in, qemu_out|
qemu_in.write File.read(*source)
qemu_in.close
IO.copy_stream qemu_out, output
qemu_out.close
end
end
library = []
Dir.glob('boot/*.s').each do |assembly_source|
source_basename = Pathname.new(assembly_source).basename
target_object = Pathname.new('build/boot') + source_basename.sub_ext('.o')
file target_object.to_s => [assembly_source, 'build/boot'] do |t|
sh CROSS_GCC, '-c', '-o', t.name, assembly_source
end
library << assembly_source unless source_basename.to_s.start_with? 'stage'
end
desc 'Initial stage'
file 'build/boot/stage1' => ['build/boot/stage1.o', *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
file 'build/boot/stage2a.s' => ['build/boot/stage1', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end
['build/boot/stage2a', 'build/boot/stage2b'].each do |exe|
file exe => [exe.ext('.s'), *library] do |t|
sh CROSS_GCC, '-nostdlib', '-o', t.name, *t.prerequisites
end
end
file 'build/boot/stage2b.s' => ['build/boot/stage2a', 'boot/stage2.elna'] do |t|
source, exe = t.prerequisites.partition { |prerequisite| prerequisite.end_with? '.elna' }
File.open t.name, 'w' do |output|
assemble_stage output, exe, source
end
end

View File

@ -1,15 +0,0 @@
DEFINITION MODULE CommandLineInterface;
FROM Common IMPORT ShortString;
TYPE
CommandLine = RECORD
input: ShortString;
lex: BOOLEAN;
parse: BOOLEAN
END;
PCommandLine = POINTER TO CommandLine;
PROCEDURE parse_command_line(): PCommandLine;
END CommandLineInterface.

View File

@ -1,75 +0,0 @@
module;
from SYSTEM import ADR, TSIZE;
from Args import GetArg, Narg;
from FIO import WriteString, WriteChar, WriteLine, StdErr;
from Storage import ALLOCATE;
from Strings import CompareStr, Length;
from MemUtils import MemZero;
from Common import ShortString;
proc parse_command_line() -> PCommandLine;
var
parameter: ShortString;
i: CARDINAL;
result: PCommandLine;
parsed: BOOLEAN;
begin
i := 1;
ALLOCATE(result, TSIZE(CommandLine));
result^.lex := false;
result^.parse := false;
MemZero(ADR(result^.input), 256);
while (i < Narg()) & (result <> nil) do
parsed := GetArg(parameter, i);
parsed := false;
if CompareStr(parameter, '--lex') = 0 then
parsed := true;
result^.lex := true
end;
if CompareStr(parameter, '--parse') = 0 then
parsed := true;
result^.parse := true
end;
if parameter[1] <> '-' then
parsed := true;
if Length(result^.input) > 0 then
WriteString(StdErr, 'Fatal error: only one source file can be compiled at once. First given "');
WriteString(StdErr, result^.input);
WriteString(StdErr, '", then "');
WriteString(StdErr, parameter);
WriteString(StdErr, '".');
WriteLine(StdErr);
result := nil
end;
if result <> nil then
result^.input := parameter
end
end;
if parsed = false then
WriteString(StdErr, 'Fatal error: unknown command line options: ');
WriteString(StdErr, parameter);
WriteChar(StdErr, '.');
WriteLine(StdErr);
result := nil
end;
i := i + 1
end;
if (result <> nil) & (Length(result^.input) = 0) then
WriteString(StdErr, 'Fatal error: no input files.');
WriteLine(StdErr);
result := nil
end;
return result
end;
end.

View File

@ -1,8 +0,0 @@
DEFINITION MODULE Common;
TYPE
ShortString = ARRAY[1..256] OF CHAR;
Identifier = ARRAY[1..256] OF CHAR;
PIdentifier = POINTER TO Identifier;
END Common.

View File

@ -1,3 +0,0 @@
module;
end.

View File

@ -1,51 +0,0 @@
program;
from FIO import Close, IsNoError, File, OpenToRead, StdErr, StdOut, WriteLine, WriteString;
from SYSTEM import ADR;
from M2RTS import HALT, ExitOnHalt;
from Lexer import Lexer, lexer_destroy, lexer_initialize;
from Transpiler import transpile;
from CommandLineInterface import PCommandLine, parse_command_line;
var
command_line: PCommandLine;
proc compile_from_stream();
var
lexer: Lexer;
source_input: File;
begin
source_input := OpenToRead(command_line^.input);
if IsNoError(source_input) = false then
WriteString(StdErr, 'Fatal error: failed to read the input file "');
WriteString(StdErr, command_line^.input);
WriteString(StdErr, '".');
WriteLine(StdErr);
ExitOnHalt(2)
end;
if IsNoError(source_input) then
lexer_initialize(ADR(lexer), source_input);
transpile(ADR(lexer), StdOut, command_line^.input);
lexer_destroy(ADR(lexer));
Close(source_input)
end
end;
begin
ExitOnHalt(0);
command_line := parse_command_line();
if command_line <> nil then
compile_from_stream()
end;
if command_line = nil then
ExitOnHalt(1)
end;
HALT()
end.

View File

@ -1,99 +0,0 @@
DEFINITION MODULE Lexer;
FROM FIO IMPORT File;
FROM Common IMPORT Identifier;
TYPE
PLexerBuffer = POINTER TO CHAR;
Lexer = RECORD
Input: File;
Buffer: PLexerBuffer;
Size: CARDINAL;
Length: CARDINAL;
Start: PLexerBuffer;
Current: PLexerBuffer
END;
PLexer = POINTER TO Lexer;
LexerKind = (
lexerKindEof,
lexerKindIdentifier,
lexerKindIf,
lexerKindThen,
lexerKindElse,
lexerKindElsif,
lexerKindWhile,
lexerKindDo,
lexerKindProc,
lexerKindBegin,
lexerKindEnd,
lexerKindImplementation,
lexerKindConst,
lexerKindVar,
lexerKindCase,
lexerKindOf,
lexerKindType,
lexerKindRecord,
lexerKindUnion,
lexerKindPipe,
lexerKindTo,
lexerKindBoolean,
lexerKindNull,
lexerKindAnd,
lexerKindOr,
lexerKindNot,
lexerKindReturn,
lexerKindDefinition,
lexerKindRange,
lexerKindLeftParen,
lexerKindRightParen,
lexerKindLeftSquare,
lexerKindRightSquare,
lexerKindGreaterEqual,
lexerKindLessEqual,
lexerKindGreaterThan,
lexerKindLessThan,
lexerKindNotEqual,
lexerKindEqual,
lexerKindSemicolon,
lexerKindDot,
lexerKindComma,
lexerKindPlus,
lexerKindMinus,
lexerKindMultiplication,
lexerKindDivision,
lexerKindRemainder,
lexerKindAssignment,
lexerKindColon,
lexerKindHat,
lexerKindAt,
lexerKindComment,
lexerKindInteger,
lexerKindWord,
lexerKindCharacter,
lexerKindString,
lexerKindFrom,
lexerKindPointer,
lexerKindArray,
lexerKindArrow,
lexerKindProgram,
lexerKindModule,
lexerKindImport
);
LexerToken = RECORD
CASE kind: LexerKind OF
lexerKindBoolean: booleanKind: BOOLEAN |
lexerKindIdentifier: identifierKind: Identifier |
lexerKindInteger: integerKind: INTEGER
END
END;
PLexerToken = POINTER TO LexerToken;
PROCEDURE lexer_initialize(ALexer: PLexer; Input: File);
PROCEDURE lexer_destroy(ALexer: PLexer);
(* Returns the last read token. *)
PROCEDURE lexer_current(ALexer: PLexer): LexerToken;
(* Read and return the next token. *)
PROCEDURE lexer_lex(ALexer: PLexer): LexerToken;
END Lexer.

View File

@ -1,828 +0,0 @@
module;
from FIO import ReadNBytes, StdErr;
from SYSTEM import ADR, TSIZE;
from DynamicStrings import String, InitStringCharStar, KillString;
from StringConvert import StringToInteger;
from Storage import DEALLOCATE, ALLOCATE;
from Strings import Length;
from MemUtils import MemCopy, MemZero;
from StrCase import Lower;
const
CHUNK_SIZE = 65536;
type
(*
* Classification table assigns each possible character to a group (class). All
* characters of the same group a handled equivalently.
*
* Classification:
*)
TransitionClass = (
transitionClassInvalid,
transitionClassDigit,
transitionClassAlpha,
transitionClassSpace,
transitionClassColon,
transitionClassEquals,
transitionClassLeftParen,
transitionClassRightParen,
transitionClassAsterisk,
transitionClassUnderscore,
transitionClassSingle,
transitionClassHex,
transitionClassZero,
transitionClassX,
transitionClassEof,
transitionClassDot,
transitionClassMinus,
transitionClassSingleQuote,
transitionClassDoubleQuote,
transitionClassGreater,
transitionClassLess,
transitionClassOther
);
TransitionState = (
transitionStateStart,
transitionStateColon,
transitionStateIdentifier,
transitionStateDecimal,
transitionStateGreater,
transitionStateMinus,
transitionStateLeftParen,
transitionStateLess,
transitionStateDot,
transitionStateComment,
transitionStateClosingComment,
transitionStateCharacter,
transitionStateString,
transitionStateLeadingZero,
transitionStateDecimalSuffix,
transitionStateEnd
);
TransitionAction = proc(PLexer, PLexerToken);
Transition = record
Action: TransitionAction;
NextState: TransitionState
end;
TransitionClasses = [22]Transition;
var
classification: [128]TransitionClass;
transitions: [16]TransitionClasses;
proc initialize_classification();
var
i: CARDINAL;
begin
classification[1] := transitionClassEof; (* NUL *)
classification[2] := transitionClassInvalid; (* SOH *)
classification[3] := transitionClassInvalid; (* STX *)
classification[4] := transitionClassInvalid; (* ETX *)
classification[5] := transitionClassInvalid; (* EOT *)
classification[6] := transitionClassInvalid; (* EMQ *)
classification[7] := transitionClassInvalid; (* ACK *)
classification[8] := transitionClassInvalid; (* BEL *)
classification[9] := transitionClassInvalid; (* BS *)
classification[10] := transitionClassSpace; (* HT *)
classification[11] := transitionClassSpace; (* LF *)
classification[12] := transitionClassInvalid; (* VT *)
classification[13] := transitionClassInvalid; (* FF *)
classification[14] := transitionClassSpace; (* CR *)
classification[15] := transitionClassInvalid; (* SO *)
classification[16] := transitionClassInvalid; (* SI *)
classification[17] := transitionClassInvalid; (* DLE *)
classification[18] := transitionClassInvalid; (* DC1 *)
classification[19] := transitionClassInvalid; (* DC2 *)
classification[20] := transitionClassInvalid; (* DC3 *)
classification[21] := transitionClassInvalid; (* DC4 *)
classification[22] := transitionClassInvalid; (* NAK *)
classification[23] := transitionClassInvalid; (* SYN *)
classification[24] := transitionClassInvalid; (* ETB *)
classification[25] := transitionClassInvalid; (* CAN *)
classification[26] := transitionClassInvalid; (* EM *)
classification[27] := transitionClassInvalid; (* SUB *)
classification[28] := transitionClassInvalid; (* ESC *)
classification[29] := transitionClassInvalid; (* FS *)
classification[30] := transitionClassInvalid; (* GS *)
classification[31] := transitionClassInvalid; (* RS *)
classification[32] := transitionClassInvalid; (* US *)
classification[33] := transitionClassSpace; (* Space *)
classification[34] := transitionClassSingle; (* ! *)
classification[35] := transitionClassDoubleQuote; (* " *)
classification[36] := transitionClassOther; (* # *)
classification[37] := transitionClassOther; (* $ *)
classification[38] := transitionClassSingle; (* % *)
classification[39] := transitionClassSingle; (* & *)
classification[40] := transitionClassSingleQuote; (* ' *)
classification[41] := transitionClassLeftParen; (* ( *)
classification[42] := transitionClassRightParen; (* ) *)
classification[43] := transitionClassAsterisk; (* * *)
classification[44] := transitionClassSingle; (* + *)
classification[45] := transitionClassSingle; (* , *)
classification[46] := transitionClassMinus; (* - *)
classification[47] := transitionClassDot; (* . *)
classification[48] := transitionClassSingle; (* / *)
classification[49] := transitionClassZero; (* 0 *)
classification[50] := transitionClassDigit; (* 1 *)
classification[51] := transitionClassDigit; (* 2 *)
classification[52] := transitionClassDigit; (* 3 *)
classification[53] := transitionClassDigit; (* 4 *)
classification[54] := transitionClassDigit; (* 5 *)
classification[55] := transitionClassDigit; (* 6 *)
classification[56] := transitionClassDigit; (* 7 *)
classification[57] := transitionClassDigit; (* 8 *)
classification[58] := transitionClassDigit; (* 9 *)
classification[59] := transitionClassColon; (* : *)
classification[60] := transitionClassSingle; (* ; *)
classification[61] := transitionClassLess; (* < *)
classification[62] := transitionClassEquals; (* = *)
classification[63] := transitionClassGreater; (* > *)
classification[64] := transitionClassOther; (* ? *)
classification[65] := transitionClassSingle; (* @ *)
classification[66] := transitionClassAlpha; (* A *)
classification[67] := transitionClassAlpha; (* B *)
classification[68] := transitionClassAlpha; (* C *)
classification[69] := transitionClassAlpha; (* D *)
classification[70] := transitionClassAlpha; (* E *)
classification[71] := transitionClassAlpha; (* F *)
classification[72] := transitionClassAlpha; (* G *)
classification[73] := transitionClassAlpha; (* H *)
classification[74] := transitionClassAlpha; (* I *)
classification[75] := transitionClassAlpha; (* J *)
classification[76] := transitionClassAlpha; (* K *)
classification[77] := transitionClassAlpha; (* L *)
classification[78] := transitionClassAlpha; (* M *)
classification[79] := transitionClassAlpha; (* N *)
classification[80] := transitionClassAlpha; (* O *)
classification[81] := transitionClassAlpha; (* P *)
classification[82] := transitionClassAlpha; (* Q *)
classification[83] := transitionClassAlpha; (* R *)
classification[84] := transitionClassAlpha; (* S *)
classification[85] := transitionClassAlpha; (* T *)
classification[86] := transitionClassAlpha; (* U *)
classification[87] := transitionClassAlpha; (* V *)
classification[88] := transitionClassAlpha; (* W *)
classification[89] := transitionClassAlpha; (* X *)
classification[90] := transitionClassAlpha; (* Y *)
classification[91] := transitionClassAlpha; (* Z *)
classification[92] := transitionClassSingle; (* [ *)
classification[93] := transitionClassOther; (* \ *)
classification[94] := transitionClassSingle; (* ] *)
classification[95] := transitionClassSingle; (* ^ *)
classification[96] := transitionClassUnderscore; (* _ *)
classification[97] := transitionClassOther; (* ` *)
classification[98] := transitionClassHex; (* a *)
classification[99] := transitionClassHex; (* b *)
classification[100] := transitionClassHex; (* c *)
classification[101] := transitionClassHex; (* d *)
classification[102] := transitionClassHex; (* e *)
classification[103] := transitionClassHex; (* f *)
classification[104] := transitionClassAlpha; (* g *)
classification[105] := transitionClassAlpha; (* h *)
classification[106] := transitionClassAlpha; (* i *)
classification[107] := transitionClassAlpha; (* j *)
classification[108] := transitionClassAlpha; (* k *)
classification[109] := transitionClassAlpha; (* l *)
classification[110] := transitionClassAlpha; (* m *)
classification[111] := transitionClassAlpha; (* n *)
classification[112] := transitionClassAlpha; (* o *)
classification[113] := transitionClassAlpha; (* p *)
classification[114] := transitionClassAlpha; (* q *)
classification[115] := transitionClassAlpha; (* r *)
classification[116] := transitionClassAlpha; (* s *)
classification[117] := transitionClassAlpha; (* t *)
classification[118] := transitionClassAlpha; (* u *)
classification[119] := transitionClassAlpha; (* v *)
classification[120] := transitionClassAlpha; (* w *)
classification[121] := transitionClassX; (* x *)
classification[122] := transitionClassAlpha; (* y *)
classification[123] := transitionClassAlpha; (* z *)
classification[124] := transitionClassOther; (* { *)
classification[125] := transitionClassSingle; (* | *)
classification[126] := transitionClassOther; (* } *)
classification[127] := transitionClassSingle; (* ~ *)
classification[128] := transitionClassInvalid; (* DEL *)
i := 129;
while i <= 256 do
classification[i] := transitionClassOther;
i := i + 1
end
end;
proc compare_keyword(Keyword: ARRAY OF CHAR, TokenStart: PLexerBuffer, TokenEnd: PLexerBuffer) -> BOOLEAN;
var
result: BOOLEAN;
index: CARDINAL;
begin
index := 0;
result := true;
while (index < Length(Keyword)) & (TokenStart <> TokenEnd) & result DO
result := (Keyword[index] = TokenStart^) or (Lower(Keyword[index]) = TokenStart^);
INC(TokenStart);
INC(index)
end;
result := (index = Length(Keyword)) & (TokenStart = TokenEnd) & result;
return result
end;
(* Reached the end of file. *)
proc transition_action_eof(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindEof
end;
(* Add the character to the token currently read and advance to the next character. *)
proc transition_action_accumulate(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.Current)
end;
(* The current character is not a part of the token. Finish the token already
* read. Don't advance to the next character. *)
proc transition_action_finalize(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = ':' then
token^.kind := lexerKindColon
end;
if lexer^.Start^ = '>' then
token^.kind := lexerKindGreaterThan
end;
if lexer^.Start^ = '<' then
token^.kind := lexerKindLessThan
end;
if lexer^.Start^ = '(' then
token^.kind := lexerKindLeftParen
end;
if lexer^.Start^ = '-' then
token^.kind := lexerKindLeftParen
end;
if lexer^.Start^ = '.' then
token^.kind := lexerKindDot
end
end;
(* An action for tokens containing multiple characters. *)
proc transition_action_composite(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = '<' then
if lexer^.Current^ = '>' then
token^.kind := lexerKindNotEqual
end;
if lexer^.Current^ = '=' then
token^.kind := lexerKindLessEqual
end
end;
if (lexer^.Start^ = '>') & (lexer^.Current^ = '=') then
token^.kind := lexerKindGreaterEqual
end;
if (lexer^.Start^ = '.') & (lexer^.Current^ = '.') then
token^.kind := lexerKindRange
end;
if (lexer^.Start^ = ':') & (lexer^.Current^ = '=') then
token^.kind := lexerKindAssignment
end;
if (lexer^.Start^ = '-') & (lexer^.Current^ = '>') then
token^.kind := lexerKindArrow
end;
INC(lexer^.Current)
end;
(* Skip a space. *)
proc transition_action_skip(lexer: PLexer, token: PLexerToken);
begin
INC(lexer^.Current);
INC(lexer^.Start)
end;
(* Delimited string action. *)
proc transition_action_delimited(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Start^ = '(' then
token^.kind := lexerKindComment
end;
if lexer^.Start^ = '"' then
token^.kind := lexerKindCharacter
end;
if lexer^.Start^ = "'" then
token^.kind := lexerKindString
end;
INC(lexer^.Current)
end;
(* Finalize keyword or identifier. *)
proc transition_action_key_id(lexer: PLexer, token: PLexerToken);
begin
token^.kind := lexerKindIdentifier;
token^.identifierKind[1] := lexer^.Current - lexer^.Start;
MemCopy(lexer^.Start, ORD(token^.identifierKind[1]), ADR(token^.identifierKind[2]));
if compare_keyword('PROGRAM', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindProgram
end;
if compare_keyword('IMPORT', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindImport
end;
if compare_keyword('CONST', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindConst
end;
if compare_keyword('VAR', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindVar
end;
if compare_keyword('IF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindIf
end;
if compare_keyword('THEN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindThen
end;
if compare_keyword('ELSIF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindElsif
end;
if compare_keyword('ELSE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindElse
end;
if compare_keyword('WHILE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindWhile
end;
if compare_keyword('DO', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindDo
end;
if compare_keyword('proc', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindProc
end;
if compare_keyword('BEGIN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBegin
end;
if compare_keyword('END', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindEnd
end;
if compare_keyword('TYPE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindType
end;
if compare_keyword('RECORD', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindRecord
end;
if compare_keyword('UNION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindUnion
end;
if compare_keyword('NIL', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindNull
end;
if compare_keyword('AND', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindAnd
end;
if compare_keyword('OR', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindOr
end;
if compare_keyword('RETURN', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindReturn
end;
if compare_keyword('DEFINITION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindDefinition
end;
if compare_keyword('TO', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindTo
end;
if compare_keyword('CASE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindCase
end;
if compare_keyword('OF', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindOf
end;
if compare_keyword('FROM', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindFrom
end;
if compare_keyword('MODULE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindModule
end;
if compare_keyword('IMPLEMENTATION', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindImplementation
end;
if compare_keyword('POINTER', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindPointer
end;
if compare_keyword('ARRAY', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindArray
end;
if compare_keyword('TRUE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := true
end;
if compare_keyword('FALSE', lexer^.Start, lexer^.Current) then
token^.kind := lexerKindBoolean;
token^.booleanKind := false
end
end;
(* Action for tokens containing only one character. The character cannot be
* followed by other characters forming a composite token. *)
proc transition_action_single(lexer: PLexer, token: PLexerToken);
begin
if lexer^.Current^ = '&' then
token^.kind := lexerKindAnd
end;
if lexer^.Current^ = ';' then
token^.kind := lexerKindSemicolon
end;
if lexer^.Current^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.Current^ = ',' then
token^.kind := lexerKindComma
end;
if lexer^.Current^ = ')' then
token^.kind := lexerKindRightParen
end;
if lexer^.Current^ = '[' then
token^.kind := lexerKindLeftSquare
end;
if lexer^.Current^ = ']' then
token^.kind := lexerKindRightSquare
end;
if lexer^.Current^ = '^' then
token^.kind := lexerKindHat
end;
if lexer^.Current^ = '=' then
token^.kind := lexerKindEqual
end;
if lexer^.Current^ = '+' then
token^.kind := lexerKindPlus
end;
if lexer^.Current^ = '/' then
token^.kind := lexerKindDivision
end;
if lexer^.Current^ = '%' then
token^.kind := lexerKindRemainder
end;
if lexer^.Current^ = '@' then
token^.kind := lexerKindAt
end;
if lexer^.Current^ = '|' then
token^.kind := lexerKindPipe
end;
INC(lexer^.Current)
end;
(* Handle an integer literal. *)
proc transition_action_integer(lexer: PLexer, token: PLexerToken);
var
buffer: String;
integer_length: CARDINAL;
found: BOOLEAN;
begin
token^.kind := lexerKindInteger;
integer_length := lexer^.Current - lexer^.Start;
MemZero(ADR(token^.identifierKind), TSIZE(Identifier));
MemCopy(lexer^.Start, integer_length, ADR(token^.identifierKind[1]));
buffer := InitStringCharStar(ADR(token^.identifierKind[1]));
token^.integerKind := StringToInteger(buffer, 10, found);
buffer := KillString(buffer)
end;
proc set_default_transition(CurrentState: TransitionState, DefaultAction: TransitionAction, NextState: TransitionState);
var
DefaultTransition: Transition;
begin
DefaultTransition.Action := DefaultAction;
DefaultTransition.NextState := NextState;
transitions[ORD(CurrentState) + 1][ORD(transitionClassInvalid) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDigit) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAlpha) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSpace) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassColon) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEquals) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLeftParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassRightParen) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassAsterisk) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassUnderscore) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingle) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassHex) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassZero) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassX) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassEof) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDot) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassMinus) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassSingleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassDoubleQuote) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassGreater) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassLess) + 1] := DefaultTransition;
transitions[ORD(CurrentState) + 1][ORD(transitionClassOther) + 1] := DefaultTransition
end;
(*
* The transition table describes transitions from one state to another, given
* a symbol (character class).
*
* The table has m rows and n columns, where m is the amount of states and n is
* the amount of classes. So given the current state and a classified character
* the table can be used to look up the next state.
*
* Each cell is a word long.
* - The least significant byte of the word is a row number (beginning with 0).
* It specifies the target state. "ff" means that this is an end state and no
* transition is possible.
* - The next byte is the action that should be performed when transitioning.
* For the meaning of actions see labels in the lex_next function, which
* handles each action.
*)
proc initialize_transitions();
begin
(* Start state. *)
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].Action := transition_action_skip;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSpace) + 1].NextState := transitionStateStart;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassColon) + 1].NextState := transitionStateColon;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLeftParen) + 1].NextState := transitionStateLeftParen;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].Action := transition_action_single;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingle) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateLeadingZero;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].Action := transition_action_eof;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateDot;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassMinus) + 1].NextState := transitionStateMinus;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateCharacter;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateString;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateGreater;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassLess) + 1].NextState := transitionStateLess;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].Action := nil;
transitions[ORD(transitionStateStart) + 1][ORD(transitionClassOther) + 1].NextState := transitionStateEnd;
(* Colon state. *)
set_default_transition(transitionStateColon, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateColon) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Identifier state. *)
set_default_transition(transitionStateIdentifier, transition_action_key_id, transitionStateEnd);
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateIdentifier;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateIdentifier) + 1][ORD(transitionClassX) + 1].NextState := transitionStateIdentifier;
(* Decimal state. *)
set_default_transition(transitionStateDecimal, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateDecimalSuffix;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateDecimal;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateDecimal) + 1][ORD(transitionClassX) + 1].NextState := transitionStateDecimalSuffix;
(* Greater state. *)
set_default_transition(transitionStateGreater, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateGreater) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
(* Minus state. *)
set_default_transition(transitionStateMinus, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateMinus) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Left paren state. *)
set_default_transition(transitionStateLeftParen, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateLeftParen) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateComment;
(* Less state. *)
set_default_transition(transitionStateLess, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassEquals) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateLess) + 1][ORD(transitionClassGreater) + 1].NextState := transitionStateEnd;
(* Hexadecimal after 0x. *)
set_default_transition(transitionStateDot, transition_action_finalize, transitionStateEnd);
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].Action := transition_action_composite;
transitions[ORD(transitionStateDot) + 1][ORD(transitionClassDot) + 1].NextState := transitionStateEnd;
(* Comment. *)
set_default_transition(transitionStateComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Closing comment. *)
set_default_transition(transitionStateClosingComment, transition_action_accumulate, transitionStateComment);
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassRightParen) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].Action := transition_action_accumulate;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassAsterisk) + 1].NextState := transitionStateClosingComment;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateClosingComment) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
(* Character. *)
set_default_transition(transitionStateCharacter, transition_action_accumulate, transitionStateCharacter);
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateCharacter) + 1][ORD(transitionClassSingleQuote) + 1].NextState := transitionStateEnd;
(* String. *)
set_default_transition(transitionStateString, transition_action_accumulate, transitionStateString);
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].Action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassInvalid) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].Action := nil;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassEof) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].Action := transition_action_delimited;
transitions[ORD(transitionStateString) + 1][ORD(transitionClassDoubleQuote) + 1].NextState := transitionStateEnd;
(* Leading zero. *)
set_default_transition(transitionStateLeadingZero, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassUnderscore) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].Action := nil;
transitions[ORD(transitionStateLeadingZero) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd;
(* Digit with a character suffix. *)
set_default_transition(transitionStateDecimalSuffix, transition_action_integer, transitionStateEnd);
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassAlpha) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassDigit) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassHex) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassZero) + 1].NextState := transitionStateEnd;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].Action := nil;
transitions[ORD(transitionStateDecimalSuffix) + 1][ORD(transitionClassX) + 1].NextState := transitionStateEnd
end;
proc lexer_initialize(lexer: PLexer, Input: File);
begin
lexer^.Input := Input;
lexer^.Length := 0;
ALLOCATE(lexer^.Buffer, CHUNK_SIZE);
MemZero(lexer^.Buffer, CHUNK_SIZE);
lexer^.Size := CHUNK_SIZE
end;
proc lexer_current(lexer: PLexer) -> LexerToken;
var
CurrentClass: TransitionClass;
CurrentState: TransitionState;
CurrentTransition: Transition;
result: LexerToken;
begin
lexer^.Current := lexer^.Start;
CurrentState := transitionStateStart;
while CurrentState <> transitionStateEnd DO
CurrentClass := classification[ORD(lexer^.Current^) + 1];
CurrentTransition := transitions[ORD(CurrentState) + 1][ORD(CurrentClass) + 1];
if CurrentTransition.Action <> nil then
CurrentTransition.Action(lexer, ADR(result))
end;
CurrentState := CurrentTransition.NextState
end;
return result
end;
proc lexer_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
if lexer^.Length = 0 then
lexer^.Length := ReadNBytes(lexer^.Input, CHUNK_SIZE, lexer^.Buffer);
lexer^.Current := lexer^.Buffer
end;
lexer^.Start := lexer^.Current;
result := lexer_current(lexer);
return result
end;
proc lexer_destroy(lexer: PLexer);
begin
DEALLOCATE(lexer^.Buffer, lexer^.Size)
end;
begin
initialize_classification();
initialize_transitions()
end.

View File

@ -1,78 +0,0 @@
DEFINITION MODULE Parser;
FROM Common IMPORT Identifier, PIdentifier;
FROM Lexer IMPORT PLexer;
TYPE
AstImportStatement = RECORD
package: Identifier;
symbols: PIdentifier
END;
PAstImportStatement = POINTER TO AstImportStatement;
PPAstImportStatement = POINTER TO PAstImportStatement;
AstConstantDeclaration = RECORD
constant_name: Identifier;
constant_value: INTEGER
END;
PAstConstantDeclaration = POINTER TO AstConstantDeclaration;
PPAstConstantDeclaration = POINTER TO PAstConstantDeclaration;
AstFieldDeclaration = RECORD
field_name: Identifier;
field_type: PAstTypeExpression
END;
PAstFieldDeclaration = POINTER TO AstFieldDeclaration;
AstTypeExpressionKind = (
astTypeExpressionKindNamed,
astTypeExpressionKindRecord,
astTypeExpressionKindEnumeration,
astTypeExpressionKindArray,
astTypeExpressionKindPointer,
astTypeExpressionKindProcedure
);
AstTypeExpression = RECORD
CASE kind: AstTypeExpressionKind OF
astTypeExpressionKindNamed: name: Identifier |
astTypeExpressionKindEnumeration: cases: PIdentifier |
astTypeExpressionKindPointer: target: PAstTypeExpression |
astTypeExpressionKindRecord: fields: PAstFieldDeclaration |
astTypeExpressionKindArray:
base: PAstTypeExpression;
length: CARDINAL |
astTypeExpressionKindProcedure: parameters: PPAstTypeExpression
END
END;
PAstTypeExpression = POINTER TO AstTypeExpression;
PPAstTypeExpression = POINTER TO PAstTypeExpression;
AstTypeDeclaration = RECORD
identifier: Identifier;
type_expression: PAstTypeExpression
END;
PAstTypeDeclaration = POINTER TO AstTypeDeclaration;
PPAstTypeDeclaration = POINTER TO PAstTypeDeclaration;
AstVariableDeclaration = RECORD
variable_name: Identifier;
variable_type: PAstTypeExpression
END;
PAstVariableDeclaration = POINTER TO AstVariableDeclaration;
PPAstVariableDeclaration = POINTER TO PAstVariableDeclaration;
AstModule = RECORD
imports: PPAstImportStatement;
constants: PPAstConstantDeclaration;
types: PPAstTypeDeclaration;
variables: PPAstVariableDeclaration
END;
PAstModule = POINTER TO AstModule;
PROCEDURE parse_type_expression(lexer: PLexer): PAstTypeExpression;
PROCEDURE parse_type_part(lexer: PLexer): PPAstTypeDeclaration;
PROCEDURE parse_variable_part(lexer: PLexer): PPAstVariableDeclaration;
PROCEDURE parse_constant_part(lexer: PLexer): PPAstConstantDeclaration;
PROCEDURE parse_import_part(lexer: PLexer): PPAstImportStatement;
END Parser.

View File

@ -1,466 +0,0 @@
module;
from SYSTEM import TSIZE;
from MemUtils import MemZero;
from Storage import ALLOCATE, REALLOCATE;
from Lexer import LexerKind, LexerToken, lexer_current, lexer_lex;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
result := lexer_lex(lexer);
while result.kind = lexerKindComment do
result := lexer_lex(lexer)
end;
return result
end;
proc parse_type_fields(lexer: PLexer) -> PAstFieldDeclaration;
var
token: LexerToken;
field_declarations: PAstFieldDeclaration;
field_count: CARDINAL;
current_field: PAstFieldDeclaration;
begin
ALLOCATE(field_declarations, TSIZE(AstFieldDeclaration));
token := transpiler_lex(lexer);
field_count := 0;
while token.kind <> lexerKindEnd do
INC(field_count);
REALLOCATE(field_declarations, TSIZE(AstFieldDeclaration) * (field_count + 1));
current_field := field_declarations;
INC(current_field , TSIZE(AstFieldDeclaration) * (field_count - 1));
token := transpiler_lex(lexer);
current_field^.field_name := token.identifierKind;
token := transpiler_lex(lexer);
current_field^.field_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
if token.kind = lexerKindSemicolon then
token := transpiler_lex(lexer)
end
end;
INC(current_field, TSIZE(AstFieldDeclaration));
MemZero(current_field, TSIZE(AstFieldDeclaration));
return field_declarations
end;
proc parse_record_type(lexer: PLexer) -> PAstTypeExpression;
var
result: PAstTypeExpression;
begin
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindRecord;
result^.fields := parse_type_fields(lexer);
return result
end;
proc parse_pointer_type(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
result: PAstTypeExpression;
begin
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindPointer;
token := lexer_current(lexer);
if token.kind = lexerKindPointer then
token := transpiler_lex(lexer)
end;
token := lexer_current(lexer);
result^.target := parse_type_expression(lexer);
return result
end;
proc parse_array_type(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
buffer: [20]CHAR;
result: PAstTypeExpression;
begin
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindArray;
result^.length := 0;
token := lexer_current(lexer);
if token.kind = lexerKindArray then
token := transpiler_lex(lexer)
end;
if token.kind <> lexerKindOf then
token := transpiler_lex(lexer);
result^.length := token.integerKind;
token := transpiler_lex(lexer);
end;
token := transpiler_lex(lexer);
result^.base := parse_type_expression(lexer);
return result
end;
proc parse_enumeration_type(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
result: PAstTypeExpression;
current_case: PIdentifier;
case_count: CARDINAL;
begin
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindEnumeration;
case_count := 1;
ALLOCATE(result^.cases, TSIZE(Identifier) * 2);
token := transpiler_lex(lexer);
current_case := result^.cases;
current_case^ := token.identifierKind;
token := transpiler_lex(lexer);
while token.kind = lexerKindComma do
token := transpiler_lex(lexer);
INC(case_count);
REALLOCATE(result^.cases, TSIZE(Identifier) * (case_count + 1));
current_case := result^.cases;
INC(current_case, TSIZE(Identifier) * (case_count - 1));
current_case^ := token.identifierKind;
token := transpiler_lex(lexer)
end;
INC(current_case, TSIZE(Identifier));
MemZero(current_case, TSIZE(Identifier));
return result
end;
proc parse_named_type(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
result: PAstTypeExpression;
written_bytes: CARDINAL;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindNamed;
result^.name := token.identifierKind;
return result
end;
proc parse_procedure_type(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
result: PAstTypeExpression;
current_parameter: PPAstTypeExpression;
parameter_count: CARDINAL;
begin
parameter_count := 0;
ALLOCATE(result, TSIZE(AstTypeExpression));
result^.kind := astTypeExpressionKindProcedure;
ALLOCATE(result^.parameters, 1);
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
while token.kind <> lexerKindRightParen do
INC(parameter_count);
REALLOCATE(result^.parameters, TSIZE(PAstTypeExpression) * (parameter_count + 1));
current_parameter := result^.parameters;
INC(current_parameter, TSIZE(PAstTypeExpression) * (parameter_count - 1));
current_parameter^ := parse_type_expression(lexer);
token := transpiler_lex(lexer);
if token.kind = lexerKindComma then
token := transpiler_lex(lexer)
end
end;
current_parameter := result^.parameters;
INC(current_parameter, TSIZE(PAstTypeExpression) * parameter_count);
current_parameter^ := nil;
return result
end;
proc parse_type_expression(lexer: PLexer) -> PAstTypeExpression;
var
token: LexerToken;
result: PAstTypeExpression;
begin
result := nil;
token := lexer_current(lexer);
if token.kind = lexerKindRecord then
result := parse_record_type(lexer)
end;
if token.kind = lexerKindLeftParen then
result := parse_enumeration_type(lexer)
end;
if (token.kind = lexerKindArray) or (token.kind = lexerKindLeftSquare) then
result := parse_array_type(lexer)
end;
if token.kind = lexerKindHat then
result := parse_pointer_type(lexer)
end;
if token.kind = lexerKindProc then
result := parse_procedure_type(lexer)
end;
if token.kind = lexerKindIdentifier then
result := parse_named_type(lexer)
end;
return result
end;
proc parse_type_declaration(lexer: PLexer) -> PAstTypeDeclaration;
var
token: LexerToken;
result: PAstTypeDeclaration;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(AstTypeDeclaration));
result^.identifier := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
result^.type_expression := parse_type_expression(lexer);
token := transpiler_lex(lexer);
return result
end;
proc parse_type_part(lexer: PLexer) -> PPAstTypeDeclaration;
var
token: LexerToken;
result: PPAstTypeDeclaration;
current_declaration: PPAstTypeDeclaration;
declaration_count: CARDINAL;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(PAstTypeDeclaration));
current_declaration := result;
declaration_count := 0;
if token.kind = lexerKindType then
token := transpiler_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
REALLOCATE(result, TSIZE(PAstTypeDeclaration) * (declaration_count + 1));
current_declaration := result;
INC(current_declaration, TSIZE(PAstTypeDeclaration) * (declaration_count - 1));
current_declaration^ := parse_type_declaration(lexer);
token := transpiler_lex(lexer)
end
end;
if declaration_count <> 0 then
INC(current_declaration, TSIZE(PAstTypeDeclaration))
end;
current_declaration^ := nil;
return result
end;
proc parse_variable_declaration(lexer: PLexer) -> PAstVariableDeclaration;
var
token: LexerToken;
result: PAstVariableDeclaration;
begin
ALLOCATE(result, TSIZE(AstVariableDeclaration));
token := lexer_current(lexer);
result^.variable_name := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
result^.variable_type := parse_type_expression(lexer);
token := transpiler_lex(lexer);
return result
end;
proc parse_variable_part(lexer: PLexer) -> PPAstVariableDeclaration;
var
token: LexerToken;
result: PPAstVariableDeclaration;
current_declaration: PPAstVariableDeclaration;
declaration_count: CARDINAL;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(PAstVariableDeclaration));
current_declaration := result;
declaration_count := 0;
if token.kind = lexerKindVar then
token := transpiler_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
REALLOCATE(result, TSIZE(PAstVariableDeclaration) * (declaration_count + 1));
current_declaration := result;
INC(current_declaration, TSIZE(PAstVariableDeclaration) * (declaration_count - 1));
current_declaration^ := parse_variable_declaration(lexer);
token := transpiler_lex(lexer)
end
end;
if declaration_count <> 0 then
INC(current_declaration, TSIZE(PAstVariableDeclaration))
end;
current_declaration^ := nil;
return result
end;
proc parse_constant_declaration(lexer: PLexer) -> PAstConstantDeclaration;
var
token: LexerToken;
result: PAstConstantDeclaration;
begin
ALLOCATE(result, TSIZE(AstConstantDeclaration));
token := lexer_current(lexer);
result^.constant_name := token.identifierKind;
token := transpiler_lex(lexer);
token := transpiler_lex(lexer);
result^.constant_value := token.integerKind;
token := transpiler_lex(lexer);
return result
end;
proc parse_constant_part(lexer: PLexer) -> PPAstConstantDeclaration;
var
token: LexerToken;
result: PPAstConstantDeclaration;
current_declaration: PPAstConstantDeclaration;
declaration_count: CARDINAL;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(PAstConstantDeclaration));
current_declaration := result;
declaration_count := 0;
if token.kind = lexerKindConst then
token := transpiler_lex(lexer);
while token.kind = lexerKindIdentifier do
INC(declaration_count);
REALLOCATE(result, TSIZE(PAstConstantDeclaration) * (declaration_count + 1));
current_declaration := result;
INC(current_declaration, TSIZE(PAstConstantDeclaration) * (declaration_count - 1));
current_declaration^ := parse_constant_declaration(lexer);
token := transpiler_lex(lexer)
end
end;
if declaration_count <> 0 then
INC(current_declaration, TSIZE(PAstConstantDeclaration))
end;
current_declaration^ := nil;
return result
end;
proc parse_import_statement(lexer: PLexer) -> PAstImportStatement;
var
result: PAstImportStatement;
token: LexerToken;
symbol_count: CARDINAL;
current_symbol: PIdentifier;
begin
ALLOCATE(result, TSIZE(AstImportStatement));
symbol_count := 1;
token := transpiler_lex(lexer);
result^.package := token.identifierKind;
token := transpiler_lex(lexer);
ALLOCATE(result^.symbols, TSIZE(Identifier) * 2);
current_symbol := result^.symbols;
token := transpiler_lex(lexer);
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer);
while token.kind <> lexerKindSemicolon do
token := transpiler_lex(lexer);
INC(symbol_count);
REALLOCATE(result^.symbols, TSIZE(Identifier) * (symbol_count + 1));
current_symbol := result^.symbols;
INC(current_symbol, TSIZE(Identifier) * (symbol_count - 1));
current_symbol^ := token.identifierKind;
token := transpiler_lex(lexer)
end;
INC(current_symbol, TSIZE(Identifier));
MemZero(current_symbol, TSIZE(Identifier));
token := transpiler_lex(lexer);
return result
end;
proc parse_import_part(lexer: PLexer) -> PPAstImportStatement;
var
token: LexerToken;
import_statement: PPAstImportStatement;
result: PPAstImportStatement;
import_count: CARDINAL;
begin
token := lexer_current(lexer);
ALLOCATE(result, TSIZE(PAstImportStatement));
import_statement := result;
import_count := 0;
while token.kind = lexerKindFrom do
INC(import_count);
REALLOCATE(result, TSIZE(PAstImportStatement) * (import_count + 1));
import_statement := result;
INC(import_statement, TSIZE(PAstImportStatement) * (import_count - 1));
import_statement^ := parse_import_statement(lexer);
token := lexer_current(lexer)
end;
if import_count > 0 then
INC(import_statement, TSIZE(PAstImportStatement))
end;
import_statement^ := nil;
return result
end;
end.

View File

@ -1,18 +0,0 @@
DEFINITION MODULE Transpiler;
FROM FIO IMPORT File;
FROM Common IMPORT ShortString;
FROM Lexer IMPORT PLexer, Lexer;
TYPE
TranspilerContext = RECORD
input_name: ShortString;
output: File;
lexer: PLexer
END;
PTranspilerContext = POINTER TO TranspilerContext;
PROCEDURE transpile(lexer: PLexer; output: File; input_name: ShortString);
END Transpiler.

View File

@ -1,680 +0,0 @@
module;
from FIO import WriteNBytes, WriteLine, WriteChar, WriteString;
from SYSTEM import ADR, ADDRESS, TSIZE;
from NumberIO import IntToStr;
from Storage import ALLOCATE, REALLOCATE;
from MemUtils import MemCopy, MemZero;
from Common import Identifier, PIdentifier, ShortString;
from Lexer import Lexer, LexerToken, lexer_current, lexer_lex, LexerKind;
from Parser import AstModule, PAstModule, AstTypeExpressionKind,
PAstConstantDeclaration, PPAstConstantDeclaration,
AstTypeDeclaration, PAstTypeDeclaration, PPAstTypeDeclaration,
PAstVariableDeclaration, PPAstVariableDeclaration, PAstImportStatement, PPAstImportStatement,
PAstTypeExpression, PPAstTypeExpression, AstFieldDeclaration, PAstFieldDeclaration,
parse_type_expression, parse_variable_part, parse_type_part, parse_constant_part, parse_import_part;
(* Calls lexer_lex() but skips the comments. *)
proc transpiler_lex(lexer: PLexer) -> LexerToken;
var
result: LexerToken;
begin
result := lexer_lex(lexer);
while result.kind = lexerKindComment do
result := lexer_lex(lexer)
end;
return result
end;
(* Write a semicolon followed by a newline. *)
proc write_semicolon(output: File);
begin
WriteChar(output, ';');
WriteLine(output)
end;
proc write_current(lexer: PLexer, output: File);
var
written_bytes: CARDINAL;
begin
written_bytes := WriteNBytes(output, ADDRESS(lexer^.Current - lexer^.Start), lexer^.Start)
end;
proc transpile_import_statement(context: PTranspilerContext, import_statement: PAstImportStatement);
var
token: LexerToken;
written_bytes: CARDINAL;
current_symbol: PIdentifier;
begin
WriteString(context^.output, 'FROM ');
written_bytes := WriteNBytes(context^.output, ORD(import_statement^.package[1]), ADR(import_statement^.package[2]));
WriteString(context^.output, ' IMPORT ');
current_symbol := import_statement^.symbols;
written_bytes := WriteNBytes(context^.output, ORD(current_symbol^[1]), ADR(current_symbol^[2]));
INC(current_symbol, TSIZE(Identifier));
while ORD(current_symbol^[1]) <> 0 do
WriteString(context^.output, ', ');
written_bytes := WriteNBytes(context^.output, ORD(current_symbol^[1]), ADR(current_symbol^[2]));
INC(current_symbol, TSIZE(Identifier))
end;
write_semicolon(context^.output)
end;
proc transpile_import_part(context: PTranspilerContext, imports: PPAstImportStatement);
var
import_statement: PAstImportStatement;
begin
while imports^ <> nil do
transpile_import_statement(context, imports^);
INC(imports, TSIZE(PAstImportStatement))
end;
WriteLine(context^.output)
end;
proc transpile_constant_declaration(context: PTranspilerContext, declaration: PAstConstantDeclaration);
var
buffer: [20]CHAR;
written_bytes: CARDINAL;
begin
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(declaration^.constant_name[1]), ADR(declaration^.constant_name[2]));
WriteString(context^.output, ' = ');
IntToStr(declaration^.constant_value, 0, buffer);
WriteString(context^.output, buffer);
write_semicolon(context^.output)
end;
proc transpile_constant_part(context: PTranspilerContext, declarations: PPAstConstantDeclaration);
var
current_declaration: PPAstConstantDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, 'CONST');
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_constant_declaration(context, current_declaration^);
INC(current_declaration, TSIZE(PAstConstantDeclaration))
end;
WriteLine(context^.output)
end
end;
proc transpile_module(context: PTranspilerContext) -> PAstModule;
var
token: LexerToken;
result: PAstModule;
begin
ALLOCATE(result, TSIZE(AstModule));
token := transpiler_lex(context^.lexer);
if token.kind = lexerKindModule then
WriteString(context^.output, 'IMPLEMENTATION ')
end;
WriteString(context^.output, 'MODULE ');
(* Write the module name and end the line with a semicolon and newline. *)
transpile_module_name(context);
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
WriteLine(context^.output);
(* Write the module body. *)
token := transpiler_lex(context^.lexer);
result^.imports := parse_import_part(context^.lexer);
transpile_import_part(context, result^.imports);
result^.constants := parse_constant_part(context^.lexer);
transpile_constant_part(context, result^.constants);
result^.types := parse_type_part(context^.lexer);
transpile_type_part(context, result^.types);
result^.variables := parse_variable_part(context^.lexer);
transpile_variable_part(context, result^.variables);
transpile_procedure_part(context);
transpile_statement_part(context);
WriteString(context^.output, 'END ');
transpile_module_name(context);
token := transpiler_lex(context^.lexer);
WriteChar(context^.output, '.');
token := transpiler_lex(context^.lexer);
WriteLine(context^.output);
return result
end;
proc transpile_type_fields(context: PTranspilerContext, fields: PAstFieldDeclaration);
var
written_bytes: CARDINAL;
current_field: PAstFieldDeclaration;
begin
current_field := fields;
while ORD(current_field^.field_name[1]) <> 0 do
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(current_field^.field_name[1]), ADR(current_field^.field_name[2]));
WriteString(context^.output, ': ');
transpile_type_expression(context, current_field^.field_type);
INC(current_field , TSIZE(AstFieldDeclaration));
if ORD(current_field^.field_name[1]) <> 0 then
WriteChar(context^.output, ';')
end;
WriteLine(context^.output)
end
end;
proc transpile_record_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
begin
WriteString(context^.output, 'RECORD');
WriteLine(context^.output);
transpile_type_fields(context, type_expression^.fields);
WriteString(context^.output, ' END')
end;
proc transpile_pointer_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
token: LexerToken;
begin
WriteString(context^.output, 'POINTER TO ');
transpile_type_expression(context, type_expression^.target)
end;
proc transpile_array_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
buffer: [20]CHAR;
begin
WriteString(context^.output, 'ARRAY');
if type_expression^.length <> 0 then
WriteString(context^.output, '[1..');
IntToStr(type_expression^.length, 0, buffer);
WriteString(context^.output, buffer);
WriteChar(context^.output, ']')
end;
WriteString(context^.output, ' OF ');
transpile_type_expression(context, type_expression^.base)
end;
proc transpile_enumeration_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
current_case: PIdentifier;
written_bytes: CARDINAL;
begin
current_case := type_expression^.cases;
WriteString(context^.output, '(');
WriteLine(context^.output);
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(current_case^[1]), ADR(current_case^[2]));
INC(current_case, TSIZE(Identifier));
while ORD(current_case^[1]) <> 0 do
WriteChar(context^.output, ',');
WriteLine(context^.output);
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(current_case^[1]), ADR(current_case^[2]));
INC(current_case, TSIZE(Identifier))
end;
WriteLine(context^.output);
WriteString(context^.output, ' )')
end;
proc transpile_named_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
written_bytes: CARDINAL;
begin
written_bytes := WriteNBytes(context^.output, ORD(type_expression^.name[1]), ADR(type_expression^.name[2]))
end;
proc transpile_procedure_type(context: PTranspilerContext, type_expression: PAstTypeExpression);
var
result: PAstTypeExpression;
current_parameter: PPAstTypeExpression;
parameter_count: CARDINAL;
begin
WriteString(context^.output, 'PROCEDURE(');
current_parameter := type_expression^.parameters;
while current_parameter^ <> nil do
transpile_type_expression(context, current_parameter^);
INC(current_parameter, TSIZE(PAstTypeExpression));
if current_parameter^ <> nil then
WriteString(context^.output, ', ')
end
end;
WriteChar(context^.output, ')')
end;
proc transpile_type_expression(context: PTranspilerContext, type_expression: PAstTypeExpression);
begin
if type_expression^.kind = astTypeExpressionKindRecord then
transpile_record_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindEnumeration then
transpile_enumeration_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindArray then
transpile_array_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindPointer then
transpile_pointer_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindProcedure then
transpile_procedure_type(context, type_expression)
end;
if type_expression^.kind = astTypeExpressionKindNamed then
transpile_named_type(context, type_expression)
end
end;
proc transpile_type_declaration(context: PTranspilerContext, declaration: PAstTypeDeclaration);
var
written_bytes: CARDINAL;
begin
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(declaration^.identifier[1]), ADR(declaration^.identifier[2]));
WriteString(context^.output, ' = ');
transpile_type_expression(context, declaration^.type_expression);
write_semicolon(context^.output)
end;
proc transpile_type_part(context: PTranspilerContext, declarations: PPAstTypeDeclaration);
var
current_declaration: PPAstTypeDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, 'TYPE');
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_type_declaration(context, current_declaration^);
INC(current_declaration, TSIZE(PAstTypeDeclaration))
end;
WriteLine(context^.output)
end
end;
proc transpile_variable_declaration(context: PTranspilerContext, declaration: PAstVariableDeclaration);
var
written_bytes: CARDINAL;
begin
WriteString(context^.output, ' ');
written_bytes := WriteNBytes(context^.output, ORD(declaration^.variable_name[1]), ADR(declaration^.variable_name[2]));
WriteString(context^.output, ': ');
transpile_type_expression(context, declaration^.variable_type);
write_semicolon(context^.output)
end;
proc transpile_variable_part(context: PTranspilerContext, declarations: PPAstVariableDeclaration);
var
current_declaration: PPAstVariableDeclaration;
begin
if declarations^ <> nil then
WriteString(context^.output, 'VAR');
WriteLine(context^.output);
current_declaration := declarations;
while current_declaration^ <> nil do
transpile_variable_declaration(context, current_declaration^);
INC(current_declaration, TSIZE(PAstVariableDeclaration))
end;
WriteLine(context^.output)
end
end;
proc transpile_procedure_heading(context: PTranspilerContext) -> LexerToken;
var
token: LexerToken;
result: LexerToken;
type_expression: PAstTypeExpression;
begin
WriteString(context^.output, 'PROCEDURE ');
result := transpiler_lex(context^.lexer);
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer);
WriteChar(context^.output, '(');
token := transpiler_lex(context^.lexer);
while token.kind <> lexerKindRightParen do
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer);
WriteString(context^.output, ': ');
token := transpiler_lex(context^.lexer);
type_expression := parse_type_expression(context^.lexer);
transpile_type_expression(context, type_expression);
token := transpiler_lex(context^.lexer);
if (token.kind = lexerKindSemicolon) or (token.kind = lexerKindComma) then
WriteString(context^.output, '; ');
token := transpiler_lex(context^.lexer)
end
end;
WriteString(context^.output, ')');
token := transpiler_lex(context^.lexer);
(* Check for the return type and write it. *)
if token.kind = lexerKindArrow then
WriteString(context^.output, ': ');
token := transpiler_lex(context^.lexer);
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
end;
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
return result
end;
proc transpile_expression(context: PTranspilerContext, trailing_token: LexerKind);
var
token: LexerToken;
written_bytes: CARDINAL;
begin
token := transpiler_lex(context^.lexer);
while (token.kind <> trailing_token) & (token.kind <> lexerKindEnd) do
written_bytes := 0;
if token.kind = lexerKindNull then
WriteString(context^.output, 'NIL ');
written_bytes := 1
end;
if (token.kind = lexerKindBoolean) & token.booleanKind then
WriteString(context^.output, 'TRUE ');
written_bytes := 1
end;
if (token.kind = lexerKindBoolean) & (~token.booleanKind) then
WriteString(context^.output, 'FALSE ');
written_bytes := 1
end;
if token.kind = lexerKindOr then
WriteString(context^.output, 'OR ');
written_bytes := 1
end;
if token.kind = lexerKindAnd then
WriteString(context^.output, 'AND ');
written_bytes := 1
end;
if token.kind = lexerKindNot then
WriteString(context^.output, 'NOT ');
written_bytes := 1
end;
if written_bytes = 0 then
write_current(context^.lexer, context^.output);
WriteChar(context^.output, ' ')
end;
token := transpiler_lex(context^.lexer)
end
end;
proc transpile_if_statement(context: PTranspilerContext);
var
token: LexerToken;
begin
WriteString(context^.output, ' IF ');
transpile_expression(context, lexerKindThen);
WriteString(context^.output, 'THEN');
WriteLine(context^.output);
transpile_statements(context);
WriteString(context^.output, ' END');
token := transpiler_lex(context^.lexer)
end;
proc transpile_while_statement(context: PTranspilerContext);
var
token: LexerToken;
begin
WriteString(context^.output, ' WHILE ');
transpile_expression(context, lexerKindDo);
WriteString(context^.output, 'DO');
WriteLine(context^.output);
transpile_statements(context);
WriteString(context^.output, ' END');
token := transpiler_lex(context^.lexer)
end;
proc transpile_assignment_statement(context: PTranspilerContext);
begin
WriteString(context^.output, ' := ');
transpile_expression(context, lexerKindSemicolon);
end;
proc transpile_call_statement(context: PTranspilerContext);
var
token: LexerToken;
begin
WriteString(context^.output, '(');
token := transpiler_lex(context^.lexer);
while (token.kind <> lexerKindSemicolon) & (token.kind <> lexerKindEnd) do
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
end
end;
proc transpile_designator_expression(context: PTranspilerContext);
var
token: LexerToken;
begin
WriteString(context^.output, ' ');
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer);
while token.kind = lexerKindLeftSquare do
WriteChar(context^.output, '[');
token := transpiler_lex(context^.lexer);
while token.kind <> lexerKindRightSquare do
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
end;
WriteChar(context^.output, ']');
token := transpiler_lex(context^.lexer)
end;
if token.kind = lexerKindHat then
WriteChar(context^.output, '^');
token := transpiler_lex(context^.lexer)
end;
if token.kind = lexerKindDot then
WriteChar(context^.output, '.');
token := transpiler_lex(context^.lexer);
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
end;
if token.kind = lexerKindHat then
WriteChar(context^.output, '^');
token := transpiler_lex(context^.lexer)
end;
while token.kind = lexerKindLeftSquare do
WriteChar(context^.output, '[');
token := transpiler_lex(context^.lexer);
while token.kind <> lexerKindRightSquare do
write_current(context^.lexer, context^.output);
token := transpiler_lex(context^.lexer)
end;
WriteChar(context^.output, ']');
token := transpiler_lex(context^.lexer)
end
end;
proc transpile_return_statement(context: PTranspilerContext);
var
token: LexerToken;
begin
WriteString(context^.output, ' RETURN ');
transpile_expression(context, lexerKindSemicolon)
end;
proc transpile_statement(context: PTranspilerContext);
var
token: LexerToken;
begin
token := transpiler_lex(context^.lexer);
if token.kind = lexerKindIf then
transpile_if_statement(context)
end;
if token.kind = lexerKindWhile then
transpile_while_statement(context)
end;
if token.kind = lexerKindReturn then
transpile_return_statement(context)
end;
if token.kind = lexerKindIdentifier then
transpile_designator_expression(context);
token := lexer_current(context^.lexer);
if token.kind = lexerKindAssignment then
transpile_assignment_statement(context)
end;
if token.kind = lexerKindLeftParen then
transpile_call_statement(context)
end
end
end;
proc transpile_statements(context: PTranspilerContext);
var
token: LexerToken;
begin
token := lexer_current(context^.lexer);
while token.kind <> lexerKindEnd do
transpile_statement(context);
token := lexer_current(context^.lexer);
if token.kind = lexerKindSemicolon then
WriteChar(context^.output, ';')
end;
WriteLine(context^.output)
end
end;
proc transpile_statement_part(context: PTranspilerContext);
var
token: LexerToken;
begin
token := lexer_current(context^.lexer);
if token.kind = lexerKindBegin then
WriteString(context^.output, 'BEGIN');
WriteLine(context^.output);
transpile_statements(context)
end
end;
proc transpile_procedure_declaration(context: PTranspilerContext);
var
token: LexerToken;
seen_variables: PPAstVariableDeclaration;
written_bytes: CARDINAL;
seen_constants: PPAstConstantDeclaration;
begin
token := transpile_procedure_heading(context);
seen_constants := parse_constant_part(context^.lexer);
transpile_constant_part(context, seen_constants);
seen_variables := parse_variable_part(context^.lexer);
transpile_variable_part(context, seen_variables);
transpile_statement_part(context);
WriteString(context^.output, 'END ');
written_bytes := WriteNBytes(context^.output, ORD(token.identifierKind[1]), ADR(token.identifierKind[2]));
token := transpiler_lex(context^.lexer);
write_semicolon(context^.output);
token := transpiler_lex(context^.lexer)
end;
proc transpile_procedure_part(context: PTranspilerContext);
var
token: LexerToken;
begin
token := lexer_current(context^.lexer);
while token.kind = lexerKindProc do
transpile_procedure_declaration(context);
token := lexer_current(context^.lexer);
WriteLine(context^.output)
end
end;
proc transpile_module_name(context: PTranspilerContext);
var
counter: CARDINAL;
last_slash: CARDINAL;
begin
counter := 1;
last_slash := 0;
while (context^.input_name[counter] <> '.') & (ORD(context^.input_name[counter]) <> 0) do
if context^.input_name[counter] = '/' then
last_slash := counter
end;
INC(counter)
end;
if last_slash = 0 then
counter := 1
end;
if last_slash <> 0 then
counter := last_slash + 1
end;
while (context^.input_name[counter] <> '.') & (ORD(context^.input_name[counter]) <> 0) do
WriteChar(context^.output, context^.input_name[counter]);
INC(counter)
end;
end;
proc transpile(lexer: PLexer, output: File, input_name: ShortString);
var
token: LexerToken;
context: TranspilerContext;
ast_module: PAstModule;
begin
context.input_name := input_name;
context.output := output;
context.lexer := lexer;
ast_module := transpile_module(ADR(context))
end;
end.