aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md (renamed from gcc/README.md)4
-rw-r--r--Rakefile1
-rw-r--r--doc/appendix.tex10
-rw-r--r--doc/language.tex193
-rw-r--r--frontend/driver.cc6
-rw-r--r--frontend/lexer.ll4
-rw-r--r--rakelib/doc.rake5
-rw-r--r--rakelib/gcc.rake3
-rw-r--r--source/main.elna9
9 files changed, 204 insertions, 31 deletions
diff --git a/gcc/README.md b/README.md
index 14219d8..039639a 100644
--- a/gcc/README.md
+++ b/README.md
@@ -28,14 +28,14 @@ and Mac OS. In the latter case GCC is patched with the patches used by Homebrew
(official GCC doesn't support Apple silicon targets). Invoke with
```sh
-rake boot
+rake gcc
```
`gcc` binary is used by default, but a different gcc version can be specified
by passing `CC` and `CXX` environment variables to rake, e.g.:
```sh
-rake CC=gcc-15 CXX=g++-15 boot
+rake CC=gcc-15 CXX=g++-15 gcc
```
See `rake -T` for more tasks. The GCC source is under `build/tools`. The
diff --git a/Rakefile b/Rakefile
index 4b2ac58..16fc6a6 100644
--- a/Rakefile
+++ b/Rakefile
@@ -7,6 +7,7 @@ require 'pathname'
require 'rake/clean'
CLEAN.include 'build/gcc'
+CLOBBER.include 'build'
task default: :source
diff --git a/doc/appendix.tex b/doc/appendix.tex
index 0777d55..09bdbde 100644
--- a/doc/appendix.tex
+++ b/doc/appendix.tex
@@ -14,14 +14,14 @@
<binary-digit> = `0' | `1'.
-<hex-character> = `\\x' <hex-digit> <hex-digit>.
+<hex-character> = `\\x' <hex-digit> \{<hex-digit>\}.
<escaped-character> = `\\' \\
- (`n' | `a' | `b' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `?\@' | `0').
+ (`n' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `0').
<printable-character> = \enspace? a printable ASCII character\space?.
-<character> = <printable-character> | <escaped-character> | <hex-digit>.
+<character> = <printable-character> | <escaped-character> | <hex-character>.
<identifier> = <letter> \{<letter> | <decimal-digit>\}.
@@ -38,10 +38,10 @@
<real-literal> = <integer-literal> `.\@' <decimal-digit> \{<decimal-digit>\}
\alt{} <integer-literal>\} `e' [`+' | `-'] <decimal-digit> \{<decimal-digit>\}.
-<string-literal> = `\textquotedbl' \{<character>\} `\textquotedbl'.
-
<character-literal> = `\textquotesingle' <character> `\textquotesingle'.
+<string-literal> = `\textquotedbl' \{<character>\} `\textquotedbl'.
+
<literal> = <integer-literal> | <word-literal> | <real-literal>
\alt{} <string-literal> | <character-literal>
\alt{} `true' | `false' | `nil'.
diff --git a/doc/language.tex b/doc/language.tex
index 9112127..36ee877 100644
--- a/doc/language.tex
+++ b/doc/language.tex
@@ -10,10 +10,179 @@ Each procedure can get some input and produce an output as a result of
executing a \textbf{statement block}, a list, where each \textbf{statement}
is executed in the order it appears in the block.
-Statement components are other statement blocks and \textbf{expressions},
-where a statement has control over the evaluation of its components.
-Statements can also modify the state of the procedure or the program by
-mutating variables.
+\chapter{Vocabulary}
+
+A language is an infinite set of sentences, namely the sentences well formed
+according to its syntax. In Elna, these sentences are called compilation units.
+Each unit is a finite sequence of \textit{tokens} from a finite vocabulary.
+The vocabulary of Elna consists of identifiers, reserved words, numbers, characters,
+strings, operators, delimiters, and comments. They are called \textit{tokens}
+and are composed of sequences of characters.
+
+The following lexical rules must be observed when composing tokens. Blanks and
+line breaks must not occur within tokens (except in comments and strings). They
+are ignored unless they are essential to separate two consecutive tokens.
+Capital and lower-case letters are considered as being distinct.
+
+\section{Identifiers}
+
+\textit{Identifiers} are sequences of letters, digits and underscores. The first
+character must be a letter or an underscore.
+
+\begin{grammar}
+<identifier> = <letter> \{<letter> | <decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+ \item \verb|x|
+ \item \verb|TypeName|
+ \item \verb|procedure_name|
+\end{itemize}
+
+\section{Numbers}
+
+Numbers are signed or unsigned integers, or real numbers. Integers may be
+preceded by a prefix and followed by a suffix. The prefixes \verb|0x| and
+\verb|0X| indicate hexadecimal representation, \verb|0b| and \verb|0B|
+indicate binary representation. Unsigned integers have the suffix \verb|u|,
+signed integers have no suffix.
+
+A \textit{real number} always contains a decimal point. Optionally it may
+also contain a decimal scale factor. The letters \verb|e| or \verb|E| is
+pronounced as `times ten to the power of'.
+
+\begin{grammar}
+<integer-literal> = `0' | <counting-digit> \{<decimal-digit>\}.
+
+<word-literal> = <integer-literal> `u'
+ \alt{} `0' (`X' | `x') <hex-digit> \{<hex-digit>\}
+ \alt{} `0' (`B' | `b') <binary-digit> \{<binary-digit>\}.
+
+<real-literal> = <integer-literal> `.\@' <decimal-digit> \{<decimal-digit>\}
+ \alt{} <integer-literal>\} `e' [`+' | `-'] <decimal-digit> \{<decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+ \item 2016
+ \item 1987u
+ \item 0xff
+ \item 0b101
+ \item 0.5
+ \item 4.567e8
+\end{itemize}
+
+\section{Strings and characters}
+
+Single \textit{characters} are enclosed in single quotation marks
+(\textquotesingle).\@ \textit{Strings} are sequences of characters enclosed in
+double quotation marks (\textquotedbl). The number of characters in a string is
+called the \textit{the length} of the string.
+
+\begin{grammar}
+<escaped-character> = `\\' \\
+ (`n' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `0').
+
+<hex-character> = `\\x' <hex-digit> \{<hex-digit>\}.
+
+<character> = <printable-character> | <escaped-character> | <hex-character>.
+
+<character-literal> = `\textquotesingle' <character> `\textquotesingle'.
+
+<string-literal> = `\textquotedbl' \{<character>\} `\textquotedbl'.
+\end{grammar}
+
+Alternatively, a single character may be represented by a
+\textit{escape sequence} (see~\ref{table:escape}), a character combination
+beginning with a backslash (\textbackslash).
+
+\begin{table}[ht]
+\centering
+\begin{tabular}{r l}
+ \textbf{Sequence} & \textbf{Meaning} \\
+ \toprule
+ \verb|\n| & Newline \\
+ \midrule
+ \verb|\t| & Horizontal tab \\
+ \midrule
+ \verb|\f| & Form feed \\
+ \midrule
+ \verb|\r| & Carriage return \\
+ \midrule
+ \verb|\v| & Vertical tab \\
+ \midrule
+ \verb|\\| & Backslash \\
+ \midrule
+ \verb|\'| & Single quote \\
+ \midrule
+ \verb|\"| & Double quote \\
+ \midrule
+ \verb|\0| & Null character \\
+ \midrule
+ \verb|\xh…| & Arbitrary hexadecimal value, where \verb|n| is a hexadecimal digit \\
+ \bottomrule
+\end{tabular}
+\caption{Escape sequences}\label{table:escape}
+\end{table}
+
+Examples:
+
+\begin{itemize}
+ \item \verb|"String"|
+ \item \verb|'c'|
+ \item \verb|'\''|
+ \item \verb|"\"multi\nline\nquoted\nstring\""|
+\end{itemize}
+
+\section{Operators and delimiters}
+
+\textit{Operators} and \textit{delimiters} are the special characters, character
+pairs, or reserved words listed below. These reserved words consist exclusively
+of letters and cannot be used in the role of identifiers.
+
+\begin{itemize}
+ \item{}:=
+ \item $@\quad\hat{}\quad\sim$
+ \item $.\quad,\quad;\quad:\quad|$
+ \item $<\quad>\quad>=\quad<=\quad<>\quad=$
+ \item $+\quad-\quad*\quad/$
+ \item $or\quad{}xor\quad\&$
+ \item (\ and\ )
+ \item \lbrack{} and \rbrack{}
+ \item \{ and \}
+ \item Pointer
+ \item module
+ \item import
+ \item type
+ \item const
+ \item var
+ \item begin
+ \item end
+ \item proc
+ \item record
+ \item while
+ \item do
+ \item case
+ \item of
+ \item if
+ \item then
+ \item elsif
+ \item else
+ \item cast
+ \item return
+ \item true
+ \item false
+ \item nil
+\end{itemize}
+
+\section{Comments}
+
+\textit{Comments} may be inserted between any two tokens in a program. They
+are arbitrary character sequences opened by the bracket \verb|(*| and closed
+by \verb|*)|. Comments do not affect the meaning of a program.
\chapter{Expressions}
@@ -335,5 +504,21 @@ relations $=$ and $<>$ apply to all types.
\chapter{Statements}
+\begin{grammar}
+<statement> = <assignment> | <procedure-call> | <defer-statement>
+ | <label-declaration> | <goto-statement> |
+ | <while-statement> | <if-statement> | <case-statement>.
+\end{grammar}
+
+Statements denote actions. There are elementary and structured statements.
+Elementary statements are not composed of any parts that are themselves
+statements. They are the assignment and the procedure call. Structured
+statements are composed of parts that are themselves statements. They
+are used to express sequencing and conditional, selective, and
+repetitive execution. A statement may also be empty, in which case it
+denotes no action. The empty statement is included in order to relax
+punctuation rules in statement sequences.
+
+\section{Elementary statements}
\section{Conditional statements}
\section{Loop statements}
diff --git a/frontend/driver.cc b/frontend/driver.cc
index 1c20d09..a266937 100644
--- a/frontend/driver.cc
+++ b/frontend/driver.cc
@@ -50,10 +50,6 @@ namespace elna::frontend
{
case 'n':
return '\n';
- case 'a':
- return '\a';
- case 'b':
- return '\b';
case 't':
return '\t';
case 'f':
@@ -68,8 +64,6 @@ namespace elna::frontend
return '\'';
case '"':
return '"';
- case '?':
- return '\?';
case '0':
return '\0';
default:
diff --git a/frontend/lexer.ll b/frontend/lexer.ll
index 7c18b32..b48b4a6 100644
--- a/frontend/lexer.ll
+++ b/frontend/lexer.ll
@@ -175,7 +175,7 @@ of {
return yy::parser::make_INTEGER(result, this->location);
}
}
-0x{HIGIT}+ {
+0[x|X]{HIGIT}+ {
unsigned long result = strtoul(yytext, NULL, 16);
if (errno == ERANGE)
@@ -187,7 +187,7 @@ of {
return yy::parser::make_WORD(result, this->location);
}
}
-0b{BIGIT}+ {
+0[b|B]{BIGIT}+ {
unsigned long result = strtoul(yytext, NULL, 2);
if (errno == ERANGE)
diff --git a/rakelib/doc.rake b/rakelib/doc.rake
index 54f5d35..7838871 100644
--- a/rakelib/doc.rake
+++ b/rakelib/doc.rake
@@ -19,8 +19,9 @@ rule '.bbl' => '.bcf' do |t|
end
namespace :doc do
- task :tex do
- sh 'pdflatex', '-output-directory', '../build/doc', 'report', chdir: 'doc'
+ task tex: 'build/doc' do |t|
+ sh 'pdflatex', '-output-directory', "../#{t.prerequisites.first}", 'report',
+ chdir: 'doc'
end
end
diff --git a/rakelib/gcc.rake b/rakelib/gcc.rake
index 2b8bbfb..3f36ce9 100644
--- a/rakelib/gcc.rake
+++ b/rakelib/gcc.rake
@@ -84,9 +84,10 @@ namespace :gcc do
configure_options = [
"--prefix=#{File.realpath HOST_INSTALL}",
- '--enable-languages=c,c++,elna',
+ '--enable-languages=c,c++,jit,elna',
'--disable-bootstrap',
'--disable-multilib',
+ '--enable-host-shared',
'--with-system-zlib',
"--target=#{build_target}",
"--build=#{build_target}",
diff --git a/source/main.elna b/source/main.elna
index e60f9ae..db5e76f 100644
--- a/source/main.elna
+++ b/source/main.elna
@@ -186,12 +186,6 @@ begin
'n':
result^ := '\n';
successful := true
- | 'a':
- result^ := '\a';
- successful := true
- | 'b':
- result^ := '\b';
- successful := true
| 't':
result^ := '\t';
successful := true
@@ -213,9 +207,6 @@ begin
| '"':
result^ := '"';
successful := true
- | '?':
- result^ := '\?';
- successful := true
| '0':
result^ := '\0';
successful := true