1 files changed, 189 insertions, 4 deletions
diff --git a/doc/language.tex b/doc/language.tex
index 9112127..36ee877 100644
--- a/doc/language.tex
+++ b/doc/language.tex
@@ -10,10 +10,179 @@ Each procedure can get some input and produce an output as a result of
 executing a \textbf{statement block}, a list, where each \textbf{statement}
 is executed in the order it appears in the block.
 
-Statement components are other statement blocks and \textbf{expressions},
-where a statement has control over the evaluation of its components.
-Statements can also modify the state of the procedure or the program by
-mutating variables.
+\chapter{Vocabulary}
+
+A language is an infinite set of sentences, namely the sentences well formed
+according to its syntax. In Elna, these sentences are called compilation units.
+Each unit is a finite sequence of \textit{tokens} from a finite vocabulary.
+The vocabulary of Elna consists of identifiers, reserved words, numbers, characters,
+strings, operators, delimiters, and comments. They are called \textit{tokens}
+and are composed of sequences of characters.
+
+The following lexical rules must be observed when composing tokens. Blanks and
+line breaks must not occur within tokens (except in comments and strings). They
+are ignored unless they are essential to separate two consecutive tokens.
+Capital and lower-case letters are considered as being distinct.
+
+\section{Identifiers}
+
+\textit{Identifiers} are sequences of letters, digits and underscores. The first
+character must be a letter or an underscore.
+
+\begin{grammar}
+<identifier> = <letter> \{<letter> | <decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+		\item \verb|x|
+		\item \verb|TypeName|
+		\item \verb|procedure_name|
+\end{itemize}
+
+\section{Numbers}
+
+Numbers are signed or unsigned integers, or real numbers. Integers may be
+preceded by a prefix and followed by a suffix. The prefixes \verb|0x| and
+\verb|0X| indicate hexadecimal representation, \verb|0b| and \verb|0B|
+indicate binary representation. Unsigned integers have the suffix \verb|u|,
+signed integers have no suffix.
+
+A \textit{real number} always contains a decimal point. Optionally it may
+also contain a decimal scale factor. The letters \verb|e| or \verb|E| is
+pronounced as `times ten to the power of'.
+
+\begin{grammar}
+<integer-literal> = `0' | <counting-digit> \{<decimal-digit>\}.
+
+<word-literal> = <integer-literal> `u'
+	\alt{} `0' (`X' | `x') <hex-digit> \{<hex-digit>\}
+	\alt{} `0' (`B' | `b') <binary-digit> \{<binary-digit>\}.
+
+<real-literal> = <integer-literal> `.\@' <decimal-digit> \{<decimal-digit>\}
+	\alt{} <integer-literal>\} `e' [`+' | `-'] <decimal-digit> \{<decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+		\item 2016
+		\item 1987u
+		\item 0xff
+		\item 0b101
+		\item 0.5
+		\item 4.567e8
+\end{itemize}
+
+\section{Strings and characters}
+
+Single \textit{characters} are enclosed in single quotation marks
+(\textquotesingle).\@ \textit{Strings} are sequences of characters enclosed in
+double quotation marks (\textquotedbl). The number of characters in a string is
+called the \textit{the length} of the string.
+
+\begin{grammar}
+<escaped-character> = `\\' \\
+	(`n' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `0').
+
+<hex-character> = `\\x' <hex-digit> \{<hex-digit>\}.
+
+<character> = <printable-character> | <escaped-character> | <hex-character>.
+
+<character-literal> = `\textquotesingle' <character> `\textquotesingle'.
+
+<string-literal> = `\textquotedbl' \{<character>\} `\textquotedbl'.
+\end{grammar}
+
+Alternatively, a single character may be represented by a
+\textit{escape sequence} (see~\ref{table:escape}), a character combination
+beginning with a backslash (\textbackslash).
+
+\begin{table}[ht]
+\centering
+\begin{tabular}{r l}
+	\textbf{Sequence} & \textbf{Meaning} \\
+	\toprule
+	\verb|\n| & Newline \\
+	\midrule
+	\verb|\t| & Horizontal tab \\
+	\midrule
+	\verb|\f| & Form feed \\
+	\midrule
+	\verb|\r| & Carriage return \\
+	\midrule
+	\verb|\v| & Vertical tab \\
+	\midrule
+	\verb|\\| & Backslash \\
+	\midrule
+	\verb|\'| & Single quote \\
+	\midrule
+	\verb|\"| & Double quote \\
+	\midrule
+	\verb|\0| & Null character \\
+	\midrule
+	\verb|\xh…| & Arbitrary hexadecimal value, where \verb|n| is a hexadecimal digit \\
+	\bottomrule
+\end{tabular}
+\caption{Escape sequences}\label{table:escape}
+\end{table}
+
+Examples:
+
+\begin{itemize}
+		\item \verb|"String"|
+		\item \verb|'c'|
+		\item \verb|'\''|
+		\item \verb|"\"multi\nline\nquoted\nstring\""|
+\end{itemize}
+
+\section{Operators and delimiters}
+
+\textit{Operators} and \textit{delimiters} are the special characters, character
+pairs, or reserved words listed below. These reserved words consist exclusively
+of letters and cannot be used in the role of identifiers.
+
+\begin{itemize}
+	\item{}:=
+	\item $@\quad\hat{}\quad\sim$
+	\item $.\quad,\quad;\quad:\quad|$
+	\item $<\quad>\quad>=\quad<=\quad<>\quad=$
+	\item $+\quad-\quad*\quad/$
+	\item $or\quad{}xor\quad\&$
+	\item (\ and\ )
+	\item \lbrack{} and \rbrack{}
+	\item \{ and \}
+	\item Pointer
+	\item module
+	\item import
+	\item type
+	\item const
+	\item var
+	\item begin
+	\item end
+	\item proc
+	\item record
+	\item while
+	\item do
+	\item case
+	\item of
+	\item if
+	\item then
+	\item elsif
+	\item else
+	\item cast
+	\item return
+	\item true
+	\item false
+	\item nil
+\end{itemize}
+
+\section{Comments}
+
+\textit{Comments} may be inserted between any two tokens in a program. They
+are arbitrary character sequences opened by the bracket \verb|(*| and closed
+by \verb|*)|. Comments do not affect the meaning of a program.
 
 \chapter{Expressions}
 
@@ -335,5 +504,21 @@ relations $=$ and $<>$ apply to all types.
 
 \chapter{Statements}
 
+\begin{grammar}
+<statement> = <assignment> | <procedure-call> | <defer-statement>
+    | <label-declaration> | <goto-statement> |
+    | <while-statement> | <if-statement> | <case-statement>.
+\end{grammar}
+
+Statements denote actions. There are elementary and structured statements.
+Elementary statements are not composed of any parts that are themselves
+statements. They are the assignment and the procedure call. Structured
+statements are composed of parts that are themselves statements. They
+are used to express sequencing and conditional, selective, and
+repetitive execution. A statement may also be empty, in which case it
+denotes no action. The empty statement is included in order to relax
+punctuation rules in statement sequences.
+
+\section{Elementary statements}
 \section{Conditional statements}
 \section{Loop statements}