aboutsummaryrefslogtreecommitdiff
path: root/doc/language.tex
diff options
context:
space:
mode:
Diffstat (limited to 'doc/language.tex')
-rw-r--r--doc/language.tex193
1 files changed, 189 insertions, 4 deletions
diff --git a/doc/language.tex b/doc/language.tex
index 9112127..36ee877 100644
--- a/doc/language.tex
+++ b/doc/language.tex
@@ -10,10 +10,179 @@ Each procedure can get some input and produce an output as a result of
executing a \textbf{statement block}, a list, where each \textbf{statement}
is executed in the order it appears in the block.
-Statement components are other statement blocks and \textbf{expressions},
-where a statement has control over the evaluation of its components.
-Statements can also modify the state of the procedure or the program by
-mutating variables.
+\chapter{Vocabulary}
+
+A language is an infinite set of sentences, namely the sentences well formed
+according to its syntax. In Elna, these sentences are called compilation units.
+Each unit is a finite sequence of \textit{tokens} from a finite vocabulary.
+The vocabulary of Elna consists of identifiers, reserved words, numbers, characters,
+strings, operators, delimiters, and comments. They are called \textit{tokens}
+and are composed of sequences of characters.
+
+The following lexical rules must be observed when composing tokens. Blanks and
+line breaks must not occur within tokens (except in comments and strings). They
+are ignored unless they are essential to separate two consecutive tokens.
+Capital and lower-case letters are considered as being distinct.
+
+\section{Identifiers}
+
+\textit{Identifiers} are sequences of letters, digits and underscores. The first
+character must be a letter or an underscore.
+
+\begin{grammar}
+<identifier> = <letter> \{<letter> | <decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+ \item \verb|x|
+ \item \verb|TypeName|
+ \item \verb|procedure_name|
+\end{itemize}
+
+\section{Numbers}
+
+Numbers are signed or unsigned integers, or real numbers. Integers may be
+preceded by a prefix and followed by a suffix. The prefixes \verb|0x| and
+\verb|0X| indicate hexadecimal representation, \verb|0b| and \verb|0B|
+indicate binary representation. Unsigned integers have the suffix \verb|u|,
+signed integers have no suffix.
+
+A \textit{real number} always contains a decimal point. Optionally it may
+also contain a decimal scale factor. The letters \verb|e| or \verb|E| is
+pronounced as `times ten to the power of'.
+
+\begin{grammar}
+<integer-literal> = `0' | <counting-digit> \{<decimal-digit>\}.
+
+<word-literal> = <integer-literal> `u'
+ \alt{} `0' (`X' | `x') <hex-digit> \{<hex-digit>\}
+ \alt{} `0' (`B' | `b') <binary-digit> \{<binary-digit>\}.
+
+<real-literal> = <integer-literal> `.\@' <decimal-digit> \{<decimal-digit>\}
+ \alt{} <integer-literal>\} `e' [`+' | `-'] <decimal-digit> \{<decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+ \item 2016
+ \item 1987u
+ \item 0xff
+ \item 0b101
+ \item 0.5
+ \item 4.567e8
+\end{itemize}
+
+\section{Strings and characters}
+
+Single \textit{characters} are enclosed in single quotation marks
+(\textquotesingle).\@ \textit{Strings} are sequences of characters enclosed in
+double quotation marks (\textquotedbl). The number of characters in a string is
+called the \textit{the length} of the string.
+
+\begin{grammar}
+<escaped-character> = `\\' \\
+ (`n' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `0').
+
+<hex-character> = `\\x' <hex-digit> \{<hex-digit>\}.
+
+<character> = <printable-character> | <escaped-character> | <hex-character>.
+
+<character-literal> = `\textquotesingle' <character> `\textquotesingle'.
+
+<string-literal> = `\textquotedbl' \{<character>\} `\textquotedbl'.
+\end{grammar}
+
+Alternatively, a single character may be represented by a
+\textit{escape sequence} (see~\ref{table:escape}), a character combination
+beginning with a backslash (\textbackslash).
+
+\begin{table}[ht]
+\centering
+\begin{tabular}{r l}
+ \textbf{Sequence} & \textbf{Meaning} \\
+ \toprule
+ \verb|\n| & Newline \\
+ \midrule
+ \verb|\t| & Horizontal tab \\
+ \midrule
+ \verb|\f| & Form feed \\
+ \midrule
+ \verb|\r| & Carriage return \\
+ \midrule
+ \verb|\v| & Vertical tab \\
+ \midrule
+ \verb|\\| & Backslash \\
+ \midrule
+ \verb|\'| & Single quote \\
+ \midrule
+ \verb|\"| & Double quote \\
+ \midrule
+ \verb|\0| & Null character \\
+ \midrule
+ \verb|\xh…| & Arbitrary hexadecimal value, where \verb|n| is a hexadecimal digit \\
+ \bottomrule
+\end{tabular}
+\caption{Escape sequences}\label{table:escape}
+\end{table}
+
+Examples:
+
+\begin{itemize}
+ \item \verb|"String"|
+ \item \verb|'c'|
+ \item \verb|'\''|
+ \item \verb|"\"multi\nline\nquoted\nstring\""|
+\end{itemize}
+
+\section{Operators and delimiters}
+
+\textit{Operators} and \textit{delimiters} are the special characters, character
+pairs, or reserved words listed below. These reserved words consist exclusively
+of letters and cannot be used in the role of identifiers.
+
+\begin{itemize}
+ \item{}:=
+ \item $@\quad\hat{}\quad\sim$
+ \item $.\quad,\quad;\quad:\quad|$
+ \item $<\quad>\quad>=\quad<=\quad<>\quad=$
+ \item $+\quad-\quad*\quad/$
+ \item $or\quad{}xor\quad\&$
+ \item (\ and\ )
+ \item \lbrack{} and \rbrack{}
+ \item \{ and \}
+ \item Pointer
+ \item module
+ \item import
+ \item type
+ \item const
+ \item var
+ \item begin
+ \item end
+ \item proc
+ \item record
+ \item while
+ \item do
+ \item case
+ \item of
+ \item if
+ \item then
+ \item elsif
+ \item else
+ \item cast
+ \item return
+ \item true
+ \item false
+ \item nil
+\end{itemize}
+
+\section{Comments}
+
+\textit{Comments} may be inserted between any two tokens in a program. They
+are arbitrary character sequences opened by the bracket \verb|(*| and closed
+by \verb|*)|. Comments do not affect the meaning of a program.
\chapter{Expressions}
@@ -335,5 +504,21 @@ relations $=$ and $<>$ apply to all types.
\chapter{Statements}
+\begin{grammar}
+<statement> = <assignment> | <procedure-call> | <defer-statement>
+ | <label-declaration> | <goto-statement> |
+ | <while-statement> | <if-statement> | <case-statement>.
+\end{grammar}
+
+Statements denote actions. There are elementary and structured statements.
+Elementary statements are not composed of any parts that are themselves
+statements. They are the assignment and the procedure call. Structured
+statements are composed of parts that are themselves statements. They
+are used to express sequencing and conditional, selective, and
+repetitive execution. A statement may also be empty, in which case it
+denotes no action. The empty statement is included in order to relax
+punctuation rules in statement sequences.
+
+\section{Elementary statements}
\section{Conditional statements}
\section{Loop statements}