elna/source.elna

827 lines
23 KiB
Plaintext
Raw Normal View History

const
SEEK_SET = 0, SEEK_CUR = 1, SEEK_END = 2,
TOKEN_IDENTIFIER = 1, TOKEN_IF = 2, TOKEN_THEN = 3, TOKEN_ELSE = 4, TOKEN_ELSIF = 5,
TOKEN_WHILE = 6, TOKEN_DO = 7, TOKEN_PROC = 8, TOKEN_BEGIN = 9, TOKEN_END = 10,
TOKEN_EXTERN = 11, TOKEN_CONST = 12, TOKEN_VAR = 13, TOKEN_ARRAY = 14, TOKEN_OF = 15,
TOKEN_TYPE = 16, TOKEN_RECORD = 17, TOKEN_UNION = 18, TOKEN_POINTER = 19, TOKEN_TO = 20,
TOKEN_BOOLEAN = 21, TOKEN_NIL = 22, TOKEN_AND = 23, TOKEN_OR = 24, TOKEN_NOT = 25,
TOKEN_RETURN = 26, TOKEN_CAST = 27, TOKEN_AS = 28, TOKEN_SIZEOF = 29,
TOKEN_LEFT_PAREN = 30, TOKEN_RIGHT_PAREN = 31, TOKEN_LEFT_SQUARE = 32,
TOKEN_RIGHT_SQUARE = 33, TOKEN_GREATER_EQUAL = 34, TOKEN_LESS_EQUAL = 35,
TOKEN_GREATER_THAN = 36, TOKEN_LESS_THAN = 37, TOKEN_NOT_EQUAL = 38, TOKEN_EQUAL = 39,
TOKEN_SEMICOLON = 40, TOKEN_DOT = 41, TOKEN_COMMA = 42,
TOKEN_PLUS = 43, TOKEN_MINUS = 44, TOKEN_MULTIPLICATION = 45, TOKEN_DIVISION = 46,
TOKEN_REMAINDER = 47, TOKEN_ASSIGNMENT = 48, TOKEN_COLON = 49, TOKEN_HAT = 50,
TOKEN_AT = 51, TOKEN_COMMENT = 52, TOKEN_INTEGER = 53, TOKEN_WORD = 54,
TOKEN_CHARACTER = 55, TOKEN_STRING = 56;
2025-01-11 13:32:37 +01:00
type
2025-01-31 09:46:17 +01:00
Position = record
line: Word;
column: Word
end,
Location = record
first: Position;
last: Position
end,
TokenValue = union
2025-01-30 01:03:16 +01:00
int_value: Int;
string_value: pointer to Char;
2025-01-31 09:46:17 +01:00
boolean_value: Bool;
char_value: Char
end,
Token = record
kind: Int;
2025-01-31 09:46:17 +01:00
value: TokenValue;
location: Location
end,
FILE = record
dummy: Int
end,
CommandLine = record
2025-02-04 13:28:09 +01:00
input: pointer to Char;
tokenize: Bool
end,
Literal = record
value: Int
end,
ConstantDefinition = record
name: pointer to Char;
body: pointer to Literal
end,
ConstantPart = record
elements: pointer to pointer to ConstantDefinition;
count: Word
end,
Program = record
constants: ConstantPart
2025-01-11 13:32:37 +01:00
end;
2025-01-30 01:03:16 +01:00
(*
External procedures.
*)
2025-01-25 19:50:36 +01:00
proc fopen(pathname: String, mode: String): pointer to FILE; extern;
proc fclose(stream: pointer to FILE): Int; extern;
proc fseek(stream: pointer to FILE, off: Int, whence: Int): Int; extern;
proc rewind(stream: pointer to FILE); extern;
2025-01-25 19:50:36 +01:00
proc ftell(stream: pointer to FILE): Int; extern;
2025-02-01 09:21:29 +01:00
proc fread(ptr: pointer to Byte, size: Word, nmemb: Word, stream: pointer to FILE): Word; extern;
proc write(fd: Int, buf: pointer to Byte, Word: Int): Int; extern;
2025-01-25 19:50:36 +01:00
2025-02-01 09:21:29 +01:00
proc malloc(size: Word): pointer to Byte; extern;
proc free(ptr: pointer to Byte); extern;
proc calloc(nmemb: Word, size: Word): pointer to Byte; extern;
proc realloc(ptr: pointer to Byte, size: Word): pointer to Byte; extern;
proc reallocarray(ptr: pointer to Byte, n: Word, size: Word): pointer to Byte; extern;
2025-01-25 19:50:36 +01:00
proc memset(ptr: pointer to Char, c: Int, n: Int): pointer to Char; extern;
2025-02-04 13:28:09 +01:00
proc strcmp(s1: pointer to Char, s2: pointer to Char): Int; extern;
2025-01-28 11:21:02 +01:00
proc strncmp(s1: pointer to Char, s2: pointer to Char, n: Word): Int; extern;
proc strncpy(dst: pointer to Char, src: pointer to Char, dsize: Word): pointer to Char; extern;
proc strcpy(dst: pointer to Char, src: pointer to Char): pointer to Char; extern;
2025-01-25 19:50:36 +01:00
proc strlen(ptr: pointer to Char): Word; extern;
2025-01-30 23:09:51 +01:00
proc strtol(nptr: pointer to Char, endptr: pointer to pointer to Char, base: Int): Int; extern;
proc perror(s: pointer to Char); extern;
2025-01-25 19:50:36 +01:00
proc exit(code: Int); extern;
2025-01-30 01:03:16 +01:00
(*
Standard procedures.
*)
2025-01-25 19:50:36 +01:00
proc write_s(value: String);
begin
write(0, value, strlen(value))
end;
proc write_b(value: Bool);
begin
if value then
write_s("true")
else
write_s("false")
end
end;
proc write_c(value: Char);
begin
write(0, @value, 1)
end;
2025-01-27 01:16:27 +01:00
proc write_i(value: Int);
var
digit: Int, n: Int,
buffer: array 10 of Char;
2025-01-11 13:32:37 +01:00
begin
2025-01-27 01:16:27 +01:00
n := 9;
buffer[9] := '0';
2025-01-11 13:32:37 +01:00
2025-01-31 09:46:17 +01:00
if value = 0 then
write_c('0')
end;
2025-01-30 01:03:16 +01:00
while value <> 0 do
2025-01-27 01:16:27 +01:00
digit := value % 10;
value := value / 10;
2025-01-11 13:32:37 +01:00
2025-01-28 11:21:02 +01:00
buffer[n] := cast(cast('0' as Int) + digit as Char);
2025-01-27 01:16:27 +01:00
n := n - 1
end;
while n < 10 do
n := n + 1;
write_c(buffer[n])
2025-01-11 13:32:37 +01:00
end
end;
2025-01-27 01:16:27 +01:00
proc write_u(value: Word);
begin
write_i(value)
end;
2025-01-28 11:21:02 +01:00
proc is_digit(c: Char): Bool;
begin
return cast(c as Int) >= cast('0' as Int) and cast(c as Int) <= cast('9' as Int)
end;
proc is_alpha(c: Char): Bool;
begin
return cast(c as Int) >= cast('A' as Int) and cast(c as Int) <= cast('z' as Int)
end;
proc is_alnum(c: Char): Bool;
begin
return is_digit(c) or is_alpha(c)
end;
proc is_space(c: Char): Bool;
begin
2025-01-31 09:46:17 +01:00
return c = ' ' or c = '\n' or c = '\t'
2025-01-28 11:21:02 +01:00
end;
2025-01-30 01:03:16 +01:00
(*
End of standard procedures.
*)
2025-01-27 01:16:27 +01:00
proc read_source(filename: String): pointer to Char;
var
input_file: pointer to FILE,
source_size: Int,
2025-02-01 09:21:29 +01:00
input: pointer to Byte;
begin
input_file := fopen(filename, "rb");
if input_file = nil then
return nil
end;
if fseek(input_file, 0, SEEK_END) <> 0 then
fclose(input_file);
return nil
end;
source_size := ftell(input_file);
if source_size < 0 then
fclose(input_file);
return nil
end;
rewind(input_file);
2025-01-22 20:19:26 +01:00
input := calloc(source_size + 1, 1);
if fread(input, source_size, 1, input_file) <> 1u then
2025-02-04 13:28:09 +01:00
input := nil
end;
fclose(input_file);
return input
end;
2025-01-31 09:46:17 +01:00
proc escape_char(escape: Char, result: pointer to Char): Bool;
begin
if escape = 'n' then
result^ := '\n';
return true
elsif escape = 'a' then
result^ := '\a';
return true
elsif escape = 'b' then
result^ := '\b';
return true
elsif escape = 't' then
result^ := '\t';
return true
elsif escape = 'f' then
result^ := '\f';
return true
elsif escape = 'r' then
result^ := '\r';
return true
elsif escape = 'v' then
result^ := '\v';
return true
elsif escape = '\\' then
result^ := '\\';
return true
elsif escape = '\'' then
result^ := '\'';
return true
elsif escape = '"' then
result^ := '"';
return true
elsif escape = '?' then
result^ := '\?';
return true
elsif escape = '0' then
result^ := '\0';
return true
else
return false
end
end;
2025-01-28 11:21:02 +01:00
proc skip_spaces(input: pointer to Char): pointer to Char;
begin
while is_space(input^) do
input := input + 1
end;
return input
end;
proc lex_identifier(input: pointer to Char): pointer to Char;
begin
while is_alnum(input^) or input^ = '_' do
input := input + 1
end;
return input
end;
2025-01-30 23:09:51 +01:00
proc lex_comment(input: pointer to Char): pointer to Char;
var
current: pointer to Char,
next: pointer to Char;
begin
while input^ <> '\0' do
next := input + 1;
if input^ = '*' and next^ = ')' then
return next + 1
end;
input := input + 1
end;
return nil
end;
2025-02-01 09:21:29 +01:00
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
begin
if input^ = '\\' then
input := input + 1;
if escape_char(input^, @current_token^.value.char_value) then
input := input + 1
end
elsif input^ <> '\0' then
current_token^.value.char_value := input^;
input := input + 1
end;
return input
end;
2025-02-01 11:47:23 +01:00
proc lex_string(input: pointer to Char, current_token: pointer to Token): pointer to Char;
var
token_end: pointer to Char,
previous: pointer to Char,
constructed_string: pointer to Char,
token_length: Word,
is_valid: Bool;
begin
token_end := input;
previous := input - 1;
2025-02-01 11:47:23 +01:00
while token_end^ <> '\0' and not (previous^ <> '\\' and token_end^ = '"') do
previous := token_end;
token_end := token_end + 1
end;
if token_end^ <> '\"' then
return input
end;
token_length := cast(token_end - input as Word);
2025-02-01 11:47:23 +01:00
current_token^.value.string_value := cast(calloc(token_length, 1) as pointer to Char);
is_valid := true;
constructed_string := current_token^.value.string_value;
while input < token_end and is_valid do
2025-02-01 11:47:23 +01:00
if input^ = '\\' then
input := input + 1;
if escape_char(input^, constructed_string) then
input := input + 1
else
is_valid := false
end
elsif input^ <> '\0' then
constructed_string^ := input^;
input := input + 1
end;
constructed_string := constructed_string + 1
end;
return token_end
end;
2025-01-31 09:46:17 +01:00
proc print_tokens(tokens: pointer to Token, tokens_size: Word);
var
2025-01-28 11:21:02 +01:00
current_token: pointer to Token,
2025-01-31 09:46:17 +01:00
i: Word;
begin
2025-01-31 09:46:17 +01:00
i := 0u;
2025-01-28 11:21:02 +01:00
while i < tokens_size do
current_token := tokens + i;
2025-01-29 12:55:52 +01:00
if current_token^.kind = TOKEN_IF then
write_s("IF")
elsif current_token^.kind = TOKEN_THEN then
write_s("THEN")
elsif current_token^.kind = TOKEN_ELSE then
write_s("ELSE")
elsif current_token^.kind = TOKEN_ELSIF then
write_s("ELSIF")
elsif current_token^.kind = TOKEN_WHILE then
write_s("WHILE")
elsif current_token^.kind = TOKEN_DO then
write_s("DO")
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_PROC then
write_s("PROC")
elsif current_token^.kind = TOKEN_BEGIN then
write_s("BEGIN")
elsif current_token^.kind = TOKEN_END then
write_s("END")
elsif current_token^.kind = TOKEN_EXTERN then
write_s("EXTERN")
elsif current_token^.kind = TOKEN_CONST then
write_s("CONST")
elsif current_token^.kind = TOKEN_VAR then
write_s("VAR")
elsif current_token^.kind = TOKEN_ARRAY then
write_s("ARRAY")
elsif current_token^.kind = TOKEN_OF then
write_s("OF")
elsif current_token^.kind = TOKEN_TYPE then
write_s("TYPE")
elsif current_token^.kind = TOKEN_RECORD then
write_s("RECORD")
elsif current_token^.kind = TOKEN_UNION then
write_s("UNION")
elsif current_token^.kind = TOKEN_POINTER then
write_s("POINTER")
elsif current_token^.kind = TOKEN_TO then
write_s("TO")
elsif current_token^.kind = TOKEN_BOOLEAN then
write_s("BOOLEAN<");
write_b(current_token^.value.boolean_value);
write_c('>')
elsif current_token^.kind = TOKEN_NIL then
write_s("NIL")
elsif current_token^.kind = TOKEN_AND then
write_s("AND")
elsif current_token^.kind = TOKEN_OR then
write_s("OR")
elsif current_token^.kind = TOKEN_NOT then
write_s("NOT")
elsif current_token^.kind = TOKEN_RETURN then
write_s("RETURN")
elsif current_token^.kind = TOKEN_CAST then
write_s("CAST")
elsif current_token^.kind = TOKEN_AS then
write_s("AS")
elsif current_token^.kind = TOKEN_SIZEOF then
write_s("SIZEOF")
elsif current_token^.kind = TOKEN_IDENTIFIER then
write_c('<');
write_s(current_token^.value.string_value);
2025-01-29 12:55:52 +01:00
write_c('>')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_LEFT_PAREN then
write_s("(")
elsif current_token^.kind = TOKEN_RIGHT_PAREN then
write_s(")")
elsif current_token^.kind = TOKEN_LEFT_SQUARE then
write_s("[")
elsif current_token^.kind = TOKEN_RIGHT_SQUARE then
write_s("]")
elsif current_token^.kind = TOKEN_GREATER_EQUAL then
write_s(">=")
elsif current_token^.kind = TOKEN_LESS_EQUAL then
write_s("<=")
elsif current_token^.kind = TOKEN_GREATER_THAN then
write_s(">")
elsif current_token^.kind = TOKEN_LESS_THAN then
write_s("<")
elsif current_token^.kind = TOKEN_EQUAL then
write_s("=")
elsif current_token^.kind = TOKEN_NOT_EQUAL then
write_s("<>")
elsif current_token^.kind = TOKEN_SEMICOLON then
2025-01-31 09:46:17 +01:00
write_c(';')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_DOT then
2025-01-31 09:46:17 +01:00
write_c('.')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_COMMA then
2025-01-31 09:46:17 +01:00
write_c(',')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_PLUS then
2025-01-31 09:46:17 +01:00
write_c('+')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_MINUS then
2025-01-31 09:46:17 +01:00
write_c('-')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_MULTIPLICATION then
2025-01-31 09:46:17 +01:00
write_c('*')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_DIVISION then
2025-01-31 09:46:17 +01:00
write_c('/')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_REMAINDER then
2025-01-31 09:46:17 +01:00
write_c('%')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_ASSIGNMENT then
write_s(":=")
elsif current_token^.kind = TOKEN_COLON then
2025-01-31 09:46:17 +01:00
write_c(':')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_HAT then
2025-01-31 09:46:17 +01:00
write_c('^')
2025-01-30 01:03:16 +01:00
elsif current_token^.kind = TOKEN_AT then
2025-01-31 09:46:17 +01:00
write_c('@')
2025-01-30 23:09:51 +01:00
elsif current_token^.kind = TOKEN_COMMENT then
write_s("COMMENT")
elsif current_token^.kind = TOKEN_INTEGER then
write_c('<');
write_i(current_token^.value.int_value);
write_c('>')
2025-01-31 09:46:17 +01:00
elsif current_token^.kind = TOKEN_WORD then
write_c('<');
write_i(current_token^.value.int_value);
write_s("u>")
elsif current_token^.kind = TOKEN_CHARACTER then
write_c('<');
write_i(current_token^.value.char_value);
write_s("c>")
2025-02-01 09:21:29 +01:00
elsif current_token^.kind = TOKEN_STRING then
write_s("\"...\"")
2025-01-29 12:55:52 +01:00
else
write_s("UNKNOWN<");
write_i(current_token^.kind);
2025-02-01 09:21:29 +01:00
write_c('>')
2025-01-29 12:55:52 +01:00
end;
write_c(' ');
2025-01-28 11:21:02 +01:00
i := i + 1u
2025-02-01 11:47:23 +01:00
end;
write_c('\n')
2025-01-30 01:03:16 +01:00
end;
proc categorize_identifier(input_pointer: pointer to Char, token_length: Int): Token;
var
current_token: Token;
begin
if strncmp("if", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_IF
elsif strncmp("then", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_THEN
elsif strncmp("else", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_ELSE
elsif strncmp("elsif", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_ELSIF
elsif strncmp("while", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_WHILE
elsif strncmp("do", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_DO
elsif strncmp("proc", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_PROC
elsif strncmp("begin", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_BEGIN
elsif strncmp("end", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_END
elsif strncmp("extern", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_EXTERN
elsif strncmp("const", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_CONST
elsif strncmp("var", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_VAR
elsif strncmp("array", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_ARRAY
elsif strncmp("of", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_OF
elsif strncmp("type", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_TYPE
elsif strncmp("record", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_RECORD
elsif strncmp("union", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_UNION
elsif strncmp("pointer", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_POINTER
elsif strncmp("to", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_TO
elsif strncmp("true", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_BOOLEAN;
current_token.value.boolean_value := true
elsif strncmp("false", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_BOOLEAN;
current_token.value.boolean_value := false
elsif strncmp("nil", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_NIL
elsif strncmp("and", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_AND
elsif strncmp("or", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_OR
elsif strncmp("not", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_NOT
elsif strncmp("return", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_RETURN
elsif strncmp("cast", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_CAST
elsif strncmp("as", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_AS
elsif strncmp("sizeof", input_pointer, token_length) = 0 then
current_token.kind := TOKEN_SIZEOF
else
current_token.kind := TOKEN_IDENTIFIER;
current_token.value.string_value := cast(calloc(token_length + 1, 1) as pointer to Char);
strncpy(current_token.value.string_value, input_pointer, token_length)
end;
return current_token
end;
2025-02-01 11:47:23 +01:00
proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token;
2025-01-30 01:03:16 +01:00
var
token_end: pointer to Char,
tokens: pointer to Token,
current_token: pointer to Token,
2025-02-01 11:47:23 +01:00
token_length: Word;
2025-01-30 01:03:16 +01:00
begin
2025-02-01 11:47:23 +01:00
tokens_size^ := 0u;
2025-02-04 13:28:09 +01:00
tokens := nil;
2025-01-30 01:03:16 +01:00
2025-02-01 11:47:23 +01:00
input_pointer := skip_spaces(input_pointer);
2025-01-30 01:03:16 +01:00
while input_pointer^ <> '\0' do
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
2025-02-01 11:47:23 +01:00
current_token := tokens + tokens_size^;
2025-01-30 01:03:16 +01:00
if is_alpha(input_pointer^) or input_pointer^ = '_' then
token_end := lex_identifier(input_pointer + 1);
2025-02-01 11:47:23 +01:00
token_length := cast(token_end as Word) - cast(input_pointer as Word);
2025-01-30 01:03:16 +01:00
current_token^ := categorize_identifier(input_pointer, token_length);
2025-01-30 23:09:51 +01:00
input_pointer := token_end
elsif is_digit(input_pointer^) then
2025-02-04 13:28:09 +01:00
token_end := nil;
2025-01-30 23:09:51 +01:00
current_token^.value.int_value := strtol(input_pointer, @token_end, 10);
2025-01-31 09:46:17 +01:00
if token_end^ = 'u' then
current_token^.kind := TOKEN_WORD;
input_pointer := token_end + 1
else
current_token^.kind := TOKEN_INTEGER;
input_pointer := token_end
end
2025-01-30 01:03:16 +01:00
elsif input_pointer^ = '(' then
2025-01-30 23:09:51 +01:00
input_pointer := input_pointer + 1;
if input_pointer^ = '*' then
token_end := lex_comment(input_pointer + 1);
if token_end <> nil then
2025-02-01 11:47:23 +01:00
token_length := cast(token_end as Word) - cast(input_pointer as Word);
current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char);
2025-01-30 23:09:51 +01:00
strncpy(current_token^.value.string_value, input_pointer, token_length);
current_token^.kind := TOKEN_COMMENT;
input_pointer := token_end
else
current_token^.kind := 0
end
else
current_token^.kind := TOKEN_LEFT_PAREN
end
2025-01-30 01:03:16 +01:00
elsif input_pointer^ = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN;
input_pointer := input_pointer + 1
2025-01-31 09:46:17 +01:00
elsif input_pointer^ = '\'' then
2025-02-01 09:21:29 +01:00
token_end := lex_character(input_pointer + 1, current_token);
if token_end^ = '\'' then
2025-01-31 09:46:17 +01:00
current_token^.kind := TOKEN_CHARACTER;
2025-02-01 09:21:29 +01:00
input_pointer := token_end + 1
else
2025-01-31 09:46:17 +01:00
input_pointer := input_pointer + 1
end
2025-02-01 11:47:23 +01:00
elsif input_pointer^ = '"' then
token_end := lex_string(input_pointer + 1, current_token);
if token_end^ = '"' then
current_token^.kind := TOKEN_STRING;
input_pointer := token_end + 1
end
2025-01-30 01:03:16 +01:00
elsif input_pointer^ = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE;
input_pointer := input_pointer + 1
elsif input_pointer^ = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE;
input_pointer := input_pointer + 1
elsif input_pointer^ = '>' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL;
input_pointer := input_pointer + 1
else
current_token^.kind := TOKEN_GREATER_THAN
end
elsif input_pointer^ = '<' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
current_token^.kind := TOKEN_LESS_EQUAL;
input_pointer := input_pointer + 1
elsif input_pointer^ = '>' then
current_token^.kind := TOKEN_NOT_EQUAL;
input_pointer := input_pointer + 1
else
current_token^.kind := TOKEN_LESS_THAN
end
elsif input_pointer^ = '=' then
current_token^.kind := TOKEN_EQUAL;
input_pointer := input_pointer + 1
elsif input_pointer^ = ';' then
current_token^.kind := TOKEN_SEMICOLON;
input_pointer := input_pointer + 1
elsif input_pointer^ = '.' then
current_token^.kind := TOKEN_DOT;
input_pointer := input_pointer + 1
elsif input_pointer^ = ',' then
current_token^.kind := TOKEN_COMMA;
input_pointer := input_pointer + 1
elsif input_pointer^ = '+' then
current_token^.kind := TOKEN_PLUS;
input_pointer := input_pointer + 1
elsif input_pointer^ = '-' then
current_token^.kind := TOKEN_MINUS;
input_pointer := input_pointer + 1
elsif input_pointer^ = '*' then
current_token^.kind := TOKEN_MULTIPLICATION;
input_pointer := input_pointer + 1
elsif input_pointer^ = '/' then
current_token^.kind := TOKEN_DIVISION;
input_pointer := input_pointer + 1
elsif input_pointer^ = '%' then
current_token^.kind := TOKEN_REMAINDER;
input_pointer := input_pointer + 1
elsif input_pointer^ = ':' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
current_token^.kind := TOKEN_ASSIGNMENT;
input_pointer := input_pointer + 1
else
current_token^.kind := TOKEN_COLON
end
elsif input_pointer^ = '^' then
current_token^.kind := TOKEN_HAT;
input_pointer := input_pointer + 1
elsif input_pointer^ = '@' then
current_token^.kind := TOKEN_AT;
input_pointer := input_pointer + 1
else
current_token^.kind := 0;
input_pointer := input_pointer + 1
end;
if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u;
2025-02-01 11:47:23 +01:00
input_pointer := skip_spaces(input_pointer)
else
write_s("Lexical analysis error on \"");
write_c(input_pointer^);
write_s("\".\n")
2025-01-30 01:03:16 +01:00
end
end;
2025-02-01 11:47:23 +01:00
return tokens
end;
proc parse_literal(tokens: pointer to pointer to Token, tokens_size: pointer to Word): pointer to Literal;
begin
return cast(calloc(1, sizeof(Literal)) as pointer to Literal)
end;
proc parse_constant_definition(tokens: pointer to pointer to Token,
tokens_size: pointer to Word): pointer to ConstantDefinition;
var
result: pointer to ConstantDefinition;
begin
result := cast(calloc(1, sizeof(ConstantDefinition)) as pointer to ConstantDefinition);
result^.name := cast(malloc(strlen(tokens^^.value.string_value)) as pointer to Char);
strcpy(result^.name, tokens^^.value.string_value);
tokens^ := tokens^ + 2u;
tokens_size := tokens_size - 2u;
write_s(result^.name);
write_c('\n');
result^.body := parse_literal(tokens, tokens_size);
tokens^ := tokens^ + 2u;
tokens_size := tokens_size - 2u;
return result
end;
proc parse_program(tokens: pointer to pointer to Token, tokens_size: pointer to Word): pointer to Program;
var
result: pointer to Program,
current_constant: pointer to pointer to ConstantDefinition;
begin
result := cast(calloc(1, sizeof(Program)) as pointer to Program);
2025-02-04 13:28:09 +01:00
result^.constants.elements := nil;
result^.constants.count := 0u;
if tokens^^.kind = TOKEN_CONST then
tokens^ := tokens^ + 1;
tokens_size^ := tokens_size^ - 1u;
while tokens_size^ > 0u and tokens^^.kind = TOKEN_IDENTIFIER do
result^.constants.elements := cast(
reallocarray(result^.constants.elements, result^.constants.count + 1u, sizeof(pointer to ConstantDefinition))
as pointer to pointer to ConstantDefinition);
current_constant := result^.constants.elements + result^.constants.count;
result^.constants.count := result^.constants.count + 1u;
current_constant^ := parse_constant_definition(tokens, tokens_size);
if current_constant^ = nil then
return nil
end
end
end
end;
proc parse_command_line(argc: Int, argv: pointer to pointer to Char): pointer to CommandLine;
2025-02-01 11:47:23 +01:00
var
parameter: pointer to pointer to Char,
i: Int,
result: pointer to CommandLine;
begin
2025-02-04 13:28:09 +01:00
i := 1;
result := cast(malloc(sizeof(CommandLine)) as pointer to CommandLine);
result^.tokenize := false;
result^.input := nil;
while i < argc do
parameter := argv + i;
2025-02-04 13:28:09 +01:00
if strcmp(parameter^, "--tokenize") = 0 then
result^.tokenize := true
elsif parameter^^ <> '-' then
result^.input := parameter^
else
write_s("Fatal error: Unknown command line options:");
write_c(' ');
write_s(parameter^);
2025-02-04 13:28:09 +01:00
write_s(".\n");
return nil
end;
2025-02-04 13:28:09 +01:00
i := i + 1
end;
if result^.input = nil then
write_s("Fatal error: no input files.\n");
return nil
end;
2025-01-16 15:09:58 +01:00
return result
2025-02-01 11:47:23 +01:00
end;
proc process(argc: Int, argv: pointer to pointer to Char): Int;
2025-02-01 11:47:23 +01:00
var
input: pointer to Char,
tokens: pointer to Token,
tokens_size: Word,
command_line: pointer to CommandLine;
2025-02-01 11:47:23 +01:00
begin
command_line := parse_command_line(argc, argv);
if cast(command_line as Word) = 0u then
return 2
end;
input := read_source(command_line^.input);
if input = nil then
perror(command_line^.input);
return 3
end;
2025-02-01 11:47:23 +01:00
tokens := tokenize(input, @tokens_size);
2025-02-04 13:28:09 +01:00
if command_line^.tokenize then
print_tokens(tokens, tokens_size)
end;
2025-02-01 11:47:23 +01:00
parse_program(@tokens, @tokens_size);
return 0
2025-02-01 11:47:23 +01:00
end;
begin
exit(process(count, parameters))
2025-01-11 13:32:37 +01:00
end.