1 files changed, 74 insertions, 4 deletions
diff --git a/doc/language.tex b/doc/language.tex
index 08d2420..f79e663 100644
--- a/doc/language.tex
+++ b/doc/language.tex
@@ -10,10 +10,80 @@ Each procedure can get some input and produce an output as a result of
 executing a \textbf{statement block}, a list, where each \textbf{statement}
 is executed in the order it appears in the block.
 
-Statement components are other statement blocks and \textbf{expressions},
-where a statement has control over the evaluation of its components.
-Statements can also modify the state of the procedure or the program by
-mutating variables.
+\chapter{Vocabulary}
+
+A language is an infinite set of sentences, namely the sentences well formed
+according to its syntax. In Elna, these sentences are called compilation units.
+Each unit is a finite sequence of \textit{tokens} from a finite vocabulary.
+The vocabulary of Elna consists of identifiers, reserved words, numbers, characters,
+strings, operators, delimiters, and comments. They are called \textit{tokens}
+and are composed of sequences of characters.
+
+The following lexical rules must be observed when composing tokens. Blanks and
+line breaks must not occur within tokens (except in comments and strings). They
+are ignored unless they are essential to separate two consecutive tokens.
+Capital and lower-case letters are considered as being distinct.
+
+\section{Identifiers}
+
+\textit{Identifiers} are sequences of letters, digits and underscores. The first
+character must be a letter or an underscore.
+
+\begin{grammar}
+<identifier> = <letter> \{<letter> | <decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+		\item \verb|x|
+		\item \verb|TypeName|
+		\item \verb|procedure_name|
+\end{itemize}
+
+\section{Numbers}
+
+Numbers are signed or unsigned integers, or real numbers. Integers may be
+preceded by a prefix and followed by a suffix. The prefixes \verb|0x| and
+\verb|0X| indicate hexadecimal representation, \verb|0b| and \verb|0B|
+indicate binary representation. Unsigned integers have the suffix \verb|u|,
+signed integers have no suffix.
+
+A \textit{real number} always contains a decimal point. Optionally it may
+also contain a decimal scale factor. The letters \verb|e| or \verb|E| is
+pronounced as `times ten to the power of'.
+
+\begin{grammar}
+<integer-literal> = `0' | <counting-digit> \{<decimal-digit>\}.
+
+<word-literal> = <integer-literal> `u'
+	\alt{} `0' (`X' | `x') <hex-digit> \{<hex-digit>\}
+	\alt{} `0' (`B' | `b') <binary-digit> \{<binary-digit>\}.
+
+<real-literal> = <integer-literal> `.\@' <decimal-digit> \{<decimal-digit>\}
+	\alt{} <integer-literal>\} `e' [`+' | `-'] <decimal-digit> \{<decimal-digit>\}.
+\end{grammar}
+
+Examples:
+
+\begin{itemize}
+		\item 2016
+		\item 1987u
+		\item 0xff
+		\item 0b101
+		\item 0.5
+		\item 4.567e8
+\end{itemize}
+
+\section{Strings and characters}
+
+\section{Operators and delimiters}
+
+\section{Comments}
+
+\textit{Comments} may be inserted between any two tokens in a program. They
+are arbitrary character sequences opened by the bracket \verb|(*| and closed
+by \verb|*)|. Comments do not affect the meaning of a program.
 
 \chapter{Expressions}