From d9cd03ffeb13a5c30d8337ba27f15e1ba2959e4d Mon Sep 17 00:00:00 2001 From: Eugen Wissner Date: Fri, 3 Jul 2026 10:23:50 +0200 Subject: Move the documentation from the old branch --- doc/language.tex | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 189 insertions(+), 4 deletions(-) (limited to 'doc/language.tex') diff --git a/doc/language.tex b/doc/language.tex index 9112127..36ee877 100644 --- a/doc/language.tex +++ b/doc/language.tex @@ -10,10 +10,179 @@ Each procedure can get some input and produce an output as a result of executing a \textbf{statement block}, a list, where each \textbf{statement} is executed in the order it appears in the block. -Statement components are other statement blocks and \textbf{expressions}, -where a statement has control over the evaluation of its components. -Statements can also modify the state of the procedure or the program by -mutating variables. +\chapter{Vocabulary} + +A language is an infinite set of sentences, namely the sentences well formed +according to its syntax. In Elna, these sentences are called compilation units. +Each unit is a finite sequence of \textit{tokens} from a finite vocabulary. +The vocabulary of Elna consists of identifiers, reserved words, numbers, characters, +strings, operators, delimiters, and comments. They are called \textit{tokens} +and are composed of sequences of characters. + +The following lexical rules must be observed when composing tokens. Blanks and +line breaks must not occur within tokens (except in comments and strings). They +are ignored unless they are essential to separate two consecutive tokens. +Capital and lower-case letters are considered as being distinct. + +\section{Identifiers} + +\textit{Identifiers} are sequences of letters, digits and underscores. The first +character must be a letter or an underscore. + +\begin{grammar} + = \{ | \}. +\end{grammar} + +Examples: + +\begin{itemize} + \item \verb|x| + \item \verb|TypeName| + \item \verb|procedure_name| +\end{itemize} + +\section{Numbers} + +Numbers are signed or unsigned integers, or real numbers. Integers may be +preceded by a prefix and followed by a suffix. The prefixes \verb|0x| and +\verb|0X| indicate hexadecimal representation, \verb|0b| and \verb|0B| +indicate binary representation. Unsigned integers have the suffix \verb|u|, +signed integers have no suffix. + +A \textit{real number} always contains a decimal point. Optionally it may +also contain a decimal scale factor. The letters \verb|e| or \verb|E| is +pronounced as `times ten to the power of'. + +\begin{grammar} + = `0' | \{\}. + + = `u' + \alt{} `0' (`X' | `x') \{\} + \alt{} `0' (`B' | `b') \{\}. + + = `.\@' \{\} + \alt{} \} `e' [`+' | `-'] \{\}. +\end{grammar} + +Examples: + +\begin{itemize} + \item 2016 + \item 1987u + \item 0xff + \item 0b101 + \item 0.5 + \item 4.567e8 +\end{itemize} + +\section{Strings and characters} + +Single \textit{characters} are enclosed in single quotation marks +(\textquotesingle).\@ \textit{Strings} are sequences of characters enclosed in +double quotation marks (\textquotedbl). The number of characters in a string is +called the \textit{the length} of the string. + +\begin{grammar} + = `\\' \\ + (`n' | `t' | `f' | `r' | `v' | `\\' | `\textquotesingle' | `\textquotedbl' | `0'). + + = `\\x' \{\}. + + = | | . + + = `\textquotesingle' `\textquotesingle'. + + = `\textquotedbl' \{\} `\textquotedbl'. +\end{grammar} + +Alternatively, a single character may be represented by a +\textit{escape sequence} (see~\ref{table:escape}), a character combination +beginning with a backslash (\textbackslash). + +\begin{table}[ht] +\centering +\begin{tabular}{r l} + \textbf{Sequence} & \textbf{Meaning} \\ + \toprule + \verb|\n| & Newline \\ + \midrule + \verb|\t| & Horizontal tab \\ + \midrule + \verb|\f| & Form feed \\ + \midrule + \verb|\r| & Carriage return \\ + \midrule + \verb|\v| & Vertical tab \\ + \midrule + \verb|\\| & Backslash \\ + \midrule + \verb|\'| & Single quote \\ + \midrule + \verb|\"| & Double quote \\ + \midrule + \verb|\0| & Null character \\ + \midrule + \verb|\xh…| & Arbitrary hexadecimal value, where \verb|n| is a hexadecimal digit \\ + \bottomrule +\end{tabular} +\caption{Escape sequences}\label{table:escape} +\end{table} + +Examples: + +\begin{itemize} + \item \verb|"String"| + \item \verb|'c'| + \item \verb|'\''| + \item \verb|"\"multi\nline\nquoted\nstring\""| +\end{itemize} + +\section{Operators and delimiters} + +\textit{Operators} and \textit{delimiters} are the special characters, character +pairs, or reserved words listed below. These reserved words consist exclusively +of letters and cannot be used in the role of identifiers. + +\begin{itemize} + \item{}:= + \item $@\quad\hat{}\quad\sim$ + \item $.\quad,\quad;\quad:\quad|$ + \item $<\quad>\quad>=\quad<=\quad<>\quad=$ + \item $+\quad-\quad*\quad/$ + \item $or\quad{}xor\quad\&$ + \item (\ and\ ) + \item \lbrack{} and \rbrack{} + \item \{ and \} + \item Pointer + \item module + \item import + \item type + \item const + \item var + \item begin + \item end + \item proc + \item record + \item while + \item do + \item case + \item of + \item if + \item then + \item elsif + \item else + \item cast + \item return + \item true + \item false + \item nil +\end{itemize} + +\section{Comments} + +\textit{Comments} may be inserted between any two tokens in a program. They +are arbitrary character sequences opened by the bracket \verb|(*| and closed +by \verb|*)|. Comments do not affect the meaning of a program. \chapter{Expressions} @@ -335,5 +504,21 @@ relations $=$ and $<>$ apply to all types. \chapter{Statements} +\begin{grammar} + = | | + | | | + | | | . +\end{grammar} + +Statements denote actions. There are elementary and structured statements. +Elementary statements are not composed of any parts that are themselves +statements. They are the assignment and the procedure call. Structured +statements are composed of parts that are themselves statements. They +are used to express sequencing and conditional, selective, and +repetitive execution. A statement may also be empty, in which case it +denotes no action. The empty statement is included in order to relax +punctuation rules in statement sequences. + +\section{Elementary statements} \section{Conditional statements} \section{Loop statements} -- cgit v1.2.3