2025-02-03 23:04:00 +01:00
|
|
|
const
|
2025-02-07 22:12:59 +01:00
|
|
|
SEEK_SET* = 0; SEEK_CUR* = 1; SEEK_END* = 2;
|
|
|
|
|
|
|
|
TOKEN_IDENTIFIER* = 1; TOKEN_IF* = 2; TOKEN_THEN* = 3; TOKEN_ELSE* = 4; TOKEN_ELSIF* = 5;
|
|
|
|
TOKEN_WHILE* = 6; TOKEN_DO* = 7; TOKEN_PROC* = 8; TOKEN_BEGIN* = 9; TOKEN_END* = 10;
|
|
|
|
TOKEN_EXTERN* = 11; TOKEN_CONST* = 12; TOKEN_VAR* = 13; TOKEN_ARRAY* = 14; TOKEN_OF* = 15;
|
|
|
|
TOKEN_TYPE* = 16; TOKEN_RECORD* = 17; TOKEN_UNION* = 18; TOKEN_POINTER* = 19; TOKEN_TO* = 20;
|
|
|
|
TOKEN_BOOLEAN* = 21; TOKEN_NIL* = 22; TOKEN_AND* = 23; TOKEN_OR* = 24; TOKEN_NOT* = 25;
|
|
|
|
TOKEN_RETURN* = 26; TOKEN_CAST* = 27; TOKEN_AS* = 28; TOKEN_SIZEOF* = 29;
|
|
|
|
TOKEN_LEFT_PAREN* = 30; TOKEN_RIGHT_PAREN* = 31; TOKEN_LEFT_SQUARE* = 32;
|
|
|
|
TOKEN_RIGHT_SQUARE* = 33; TOKEN_GREATER_EQUAL* = 34; TOKEN_LESS_EQUAL* = 35;
|
|
|
|
TOKEN_GREATER_THAN* = 36; TOKEN_LESS_THAN* = 37; TOKEN_NOT_EQUAL* = 38; TOKEN_EQUAL* = 39;
|
|
|
|
TOKEN_SEMICOLON* = 40; TOKEN_DOT* = 41; TOKEN_COMMA* = 42;
|
|
|
|
TOKEN_PLUS* = 43; TOKEN_MINUS* = 44; TOKEN_MULTIPLICATION* = 45; TOKEN_DIVISION* = 46;
|
|
|
|
TOKEN_REMAINDER* = 47; TOKEN_ASSIGNMENT* = 48; TOKEN_COLON* = 49; TOKEN_HAT* = 50;
|
|
|
|
TOKEN_AT* = 51; TOKEN_COMMENT* = 52; TOKEN_INTEGER* = 53; TOKEN_WORD* = 54;
|
|
|
|
TOKEN_CHARACTER* = 55; TOKEN_STRING* = 56; TOKEN_DEFER* = 57;
|
2025-02-03 23:04:00 +01:00
|
|
|
|
2025-01-11 13:32:37 +01:00
|
|
|
type
|
2025-02-07 22:12:59 +01:00
|
|
|
Position* = record
|
2025-01-31 09:46:17 +01:00
|
|
|
line: Word;
|
|
|
|
column: Word
|
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
Location* = record
|
2025-01-31 09:46:17 +01:00
|
|
|
first: Position;
|
|
|
|
last: Position
|
|
|
|
end,
|
2025-02-08 23:02:27 +01:00
|
|
|
SourceCode = record
|
|
|
|
position: Position;
|
|
|
|
text: String
|
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
TokenValue* = union
|
2025-01-30 01:03:16 +01:00
|
|
|
int_value: Int;
|
|
|
|
string_value: pointer to Char;
|
2025-02-08 23:02:27 +01:00
|
|
|
string: String;
|
2025-01-31 09:46:17 +01:00
|
|
|
boolean_value: Bool;
|
|
|
|
char_value: Char
|
2025-01-17 10:11:40 +01:00
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
Token* = record
|
2025-01-21 20:18:27 +01:00
|
|
|
kind: Int;
|
2025-01-31 09:46:17 +01:00
|
|
|
value: TokenValue;
|
|
|
|
location: Location
|
2025-01-20 21:46:03 +01:00
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
FILE* = record
|
2025-01-21 20:18:27 +01:00
|
|
|
dummy: Int
|
2025-02-02 08:22:40 +01:00
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
CommandLine* = record
|
2025-02-04 13:28:09 +01:00
|
|
|
input: pointer to Char;
|
2025-02-07 22:12:59 +01:00
|
|
|
tokenize: Bool;
|
|
|
|
syntax_tree: Bool
|
2025-02-03 23:04:00 +01:00
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
Literal* = record
|
2025-02-03 23:04:00 +01:00
|
|
|
value: Int
|
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
ConstantDefinition* = record
|
2025-02-03 23:04:00 +01:00
|
|
|
name: pointer to Char;
|
|
|
|
body: pointer to Literal
|
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
ConstantPart* = record
|
2025-02-03 23:04:00 +01:00
|
|
|
elements: pointer to pointer to ConstantDefinition;
|
|
|
|
count: Word
|
|
|
|
end,
|
2025-02-07 22:12:59 +01:00
|
|
|
Program* = record
|
2025-02-03 23:04:00 +01:00
|
|
|
constants: ConstantPart
|
2025-01-11 13:32:37 +01:00
|
|
|
end;
|
|
|
|
|
2025-01-30 01:03:16 +01:00
|
|
|
(*
|
|
|
|
External procedures.
|
|
|
|
*)
|
2025-02-07 22:12:59 +01:00
|
|
|
proc fopen(pathname: pointer to Char, mode: pointer to Char): pointer to FILE; extern
|
|
|
|
proc fclose(stream: pointer to FILE): Int; extern
|
|
|
|
proc fseek(stream: pointer to FILE, off: Int, whence: Int): Int; extern
|
|
|
|
proc rewind(stream: pointer to FILE); extern
|
|
|
|
proc ftell(stream: pointer to FILE): Int; extern
|
|
|
|
proc fread(ptr: pointer to Byte, size: Word, nmemb: Word, stream: pointer to FILE): Word; extern
|
|
|
|
proc write(fd: Int, buf: pointer to Byte, Word: Int): Int; extern
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc malloc(size: Word): pointer to Byte; extern
|
|
|
|
proc free(ptr: pointer to Byte); extern
|
|
|
|
proc calloc(nmemb: Word, size: Word): pointer to Byte; extern
|
|
|
|
proc realloc(ptr: pointer to Byte, size: Word): pointer to Byte; extern
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc memset(ptr: pointer to Char, c: Int, n: Int): pointer to Char; extern
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc strcmp(s1: pointer to Char, s2: pointer to Char): Int; extern
|
|
|
|
proc strncmp(s1: pointer to Char, s2: pointer to Char, n: Word): Int; extern
|
|
|
|
proc strncpy(dst: pointer to Char, src: pointer to Char, dsize: Word): pointer to Char; extern
|
|
|
|
proc strcpy(dst: pointer to Char, src: pointer to Char): pointer to Char; extern
|
|
|
|
proc strlen(ptr: pointer to Char): Word; extern
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc strtol(nptr: pointer to Char, endptr: pointer to pointer to Char, base: Int): Int; extern
|
2025-01-30 23:09:51 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc perror(s: pointer to Char); extern
|
|
|
|
proc exit(code: Int); extern
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-01-30 01:03:16 +01:00
|
|
|
(*
|
|
|
|
Standard procedures.
|
|
|
|
*)
|
2025-02-07 00:56:54 +01:00
|
|
|
proc reallocarray(ptr: pointer to Byte, n: Word, size: Word): pointer to Byte;
|
|
|
|
begin
|
|
|
|
return realloc(ptr, n * size)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-07 00:56:54 +01:00
|
|
|
|
2025-01-25 19:50:36 +01:00
|
|
|
proc write_s(value: String);
|
2025-02-07 22:12:59 +01:00
|
|
|
begin
|
|
|
|
write(0, value.ptr, value.length)
|
|
|
|
end
|
|
|
|
|
|
|
|
proc write_z(value: pointer to Char);
|
2025-01-25 19:50:36 +01:00
|
|
|
begin
|
|
|
|
write(0, value, strlen(value))
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-25 19:50:36 +01:00
|
|
|
|
|
|
|
proc write_b(value: Bool);
|
|
|
|
begin
|
|
|
|
if value then
|
|
|
|
write_s("true")
|
|
|
|
else
|
|
|
|
write_s("false")
|
|
|
|
end
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-25 19:50:36 +01:00
|
|
|
|
|
|
|
proc write_c(value: Char);
|
|
|
|
begin
|
|
|
|
write(0, @value, 1)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-25 19:50:36 +01:00
|
|
|
|
2025-01-27 01:16:27 +01:00
|
|
|
proc write_i(value: Int);
|
|
|
|
var
|
|
|
|
digit: Int, n: Int,
|
|
|
|
buffer: array 10 of Char;
|
2025-01-11 13:32:37 +01:00
|
|
|
begin
|
2025-01-27 01:16:27 +01:00
|
|
|
n := 9;
|
|
|
|
buffer[9] := '0';
|
2025-01-11 13:32:37 +01:00
|
|
|
|
2025-01-31 09:46:17 +01:00
|
|
|
if value = 0 then
|
|
|
|
write_c('0')
|
|
|
|
end;
|
2025-01-30 01:03:16 +01:00
|
|
|
while value <> 0 do
|
2025-01-27 01:16:27 +01:00
|
|
|
digit := value % 10;
|
|
|
|
value := value / 10;
|
2025-01-11 13:32:37 +01:00
|
|
|
|
2025-01-28 11:21:02 +01:00
|
|
|
buffer[n] := cast(cast('0' as Int) + digit as Char);
|
2025-01-27 01:16:27 +01:00
|
|
|
n := n - 1
|
|
|
|
end;
|
2025-02-07 00:56:54 +01:00
|
|
|
while n < 9 do
|
2025-01-27 01:16:27 +01:00
|
|
|
n := n + 1;
|
|
|
|
write_c(buffer[n])
|
2025-01-11 13:32:37 +01:00
|
|
|
end
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-11 13:32:37 +01:00
|
|
|
|
2025-01-27 01:16:27 +01:00
|
|
|
proc write_u(value: Word);
|
|
|
|
begin
|
|
|
|
write_i(value)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-27 01:16:27 +01:00
|
|
|
|
2025-01-28 11:21:02 +01:00
|
|
|
proc is_digit(c: Char): Bool;
|
|
|
|
begin
|
|
|
|
return cast(c as Int) >= cast('0' as Int) and cast(c as Int) <= cast('9' as Int)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
|
|
|
proc is_alpha(c: Char): Bool;
|
|
|
|
begin
|
|
|
|
return cast(c as Int) >= cast('A' as Int) and cast(c as Int) <= cast('z' as Int)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
|
|
|
proc is_alnum(c: Char): Bool;
|
|
|
|
begin
|
|
|
|
return is_digit(c) or is_alpha(c)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
|
|
|
proc is_space(c: Char): Bool;
|
|
|
|
begin
|
2025-01-31 09:46:17 +01:00
|
|
|
return c = ' ' or c = '\n' or c = '\t'
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
proc string_equals_chars(this: String, that: pointer to Char, length: Word): Bool;
|
|
|
|
var
|
|
|
|
i: Word;
|
|
|
|
begin
|
|
|
|
if this.length <> length then
|
|
|
|
return false
|
|
|
|
end;
|
|
|
|
return strncmp(this.ptr, that, length) = 0
|
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
proc open_substring(string: String, start: Word): String;
|
|
|
|
begin
|
|
|
|
string.ptr := string.ptr + start;
|
|
|
|
string.length := string.length - start;
|
|
|
|
return string
|
|
|
|
end
|
|
|
|
|
|
|
|
proc char_at(string: String, position: Word): Char;
|
|
|
|
begin
|
|
|
|
return (string.ptr + position)^
|
|
|
|
end
|
|
|
|
|
2025-01-30 01:03:16 +01:00
|
|
|
(*
|
|
|
|
End of standard procedures.
|
|
|
|
*)
|
2025-01-27 01:16:27 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
proc make_position(): Position;
|
|
|
|
var
|
|
|
|
result: Position;
|
|
|
|
begin
|
|
|
|
result.line := 1u;
|
|
|
|
result.column := 1u;
|
|
|
|
return result
|
|
|
|
end
|
|
|
|
|
|
|
|
proc read_source(filename: pointer to Char, result: pointer to String): Bool;
|
2025-01-20 21:46:03 +01:00
|
|
|
var
|
|
|
|
input_file: pointer to FILE,
|
2025-01-21 20:18:27 +01:00
|
|
|
source_size: Int,
|
2025-02-01 09:21:29 +01:00
|
|
|
input: pointer to Byte;
|
2025-01-20 21:46:03 +01:00
|
|
|
begin
|
2025-02-07 22:12:59 +01:00
|
|
|
input_file := fopen(filename, "rb\0".ptr);
|
2025-01-20 21:46:03 +01:00
|
|
|
|
2025-02-03 23:04:00 +01:00
|
|
|
if input_file = nil then
|
2025-02-08 23:02:27 +01:00
|
|
|
return false
|
|
|
|
end;
|
|
|
|
defer
|
|
|
|
fclose(input_file)
|
2025-02-03 23:04:00 +01:00
|
|
|
end;
|
|
|
|
if fseek(input_file, 0, SEEK_END) <> 0 then
|
2025-02-08 23:02:27 +01:00
|
|
|
return false
|
2025-02-03 23:04:00 +01:00
|
|
|
end;
|
2025-01-20 21:46:03 +01:00
|
|
|
source_size := ftell(input_file);
|
2025-02-03 23:04:00 +01:00
|
|
|
if source_size < 0 then
|
2025-02-08 23:02:27 +01:00
|
|
|
return false
|
2025-02-03 23:04:00 +01:00
|
|
|
end;
|
|
|
|
rewind(input_file);
|
2025-01-20 21:46:03 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
input := malloc(source_size);
|
2025-02-03 23:04:00 +01:00
|
|
|
if fread(input, source_size, 1, input_file) <> 1u then
|
2025-02-08 23:02:27 +01:00
|
|
|
return false
|
2025-02-03 23:04:00 +01:00
|
|
|
end;
|
2025-02-08 23:02:27 +01:00
|
|
|
result^.length := cast(source_size as Word);
|
|
|
|
result^.ptr := cast(input as pointer to Char);
|
2025-01-21 20:18:27 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
return true
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-21 20:18:27 +01:00
|
|
|
|
2025-01-31 09:46:17 +01:00
|
|
|
proc escape_char(escape: Char, result: pointer to Char): Bool;
|
|
|
|
begin
|
|
|
|
if escape = 'n' then
|
|
|
|
result^ := '\n';
|
|
|
|
return true
|
|
|
|
elsif escape = 'a' then
|
|
|
|
result^ := '\a';
|
|
|
|
return true
|
|
|
|
elsif escape = 'b' then
|
|
|
|
result^ := '\b';
|
|
|
|
return true
|
|
|
|
elsif escape = 't' then
|
|
|
|
result^ := '\t';
|
|
|
|
return true
|
|
|
|
elsif escape = 'f' then
|
|
|
|
result^ := '\f';
|
|
|
|
return true
|
|
|
|
elsif escape = 'r' then
|
|
|
|
result^ := '\r';
|
|
|
|
return true
|
|
|
|
elsif escape = 'v' then
|
|
|
|
result^ := '\v';
|
|
|
|
return true
|
|
|
|
elsif escape = '\\' then
|
|
|
|
result^ := '\\';
|
|
|
|
return true
|
|
|
|
elsif escape = '\'' then
|
|
|
|
result^ := '\'';
|
|
|
|
return true
|
|
|
|
elsif escape = '"' then
|
|
|
|
result^ := '"';
|
|
|
|
return true
|
|
|
|
elsif escape = '?' then
|
|
|
|
result^ := '\?';
|
|
|
|
return true
|
|
|
|
elsif escape = '0' then
|
|
|
|
result^ := '\0';
|
|
|
|
return true
|
|
|
|
else
|
|
|
|
return false
|
|
|
|
end
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-31 09:46:17 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
proc advance_source(source_code: SourceCode, length: Word): SourceCode;
|
2025-01-28 11:21:02 +01:00
|
|
|
begin
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code.text := open_substring(source_code.text, length);
|
|
|
|
source_code.position.column := source_code.position.column + length;
|
|
|
|
|
|
|
|
return source_code
|
|
|
|
end
|
|
|
|
|
|
|
|
proc skip_spaces(source_code: SourceCode): SourceCode;
|
|
|
|
begin
|
|
|
|
while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do
|
|
|
|
if char_at(source_code.text, 0) = '\n' then
|
|
|
|
source_code.position.line := source_code.position.line + 1u;
|
|
|
|
source_code.position.column := 1u
|
|
|
|
else
|
|
|
|
source_code.position.column := source_code.position.column + 1u
|
|
|
|
end;
|
|
|
|
source_code.text := open_substring(source_code.text, 1u)
|
2025-01-28 11:21:02 +01:00
|
|
|
end;
|
2025-02-08 23:02:27 +01:00
|
|
|
return source_code
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
|
|
|
proc lex_identifier(input: pointer to Char): pointer to Char;
|
|
|
|
begin
|
|
|
|
while is_alnum(input^) or input^ = '_' do
|
|
|
|
input := input + 1
|
|
|
|
end;
|
|
|
|
return input
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-28 11:21:02 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool;
|
2025-01-30 23:09:51 +01:00
|
|
|
var
|
2025-02-08 23:02:27 +01:00
|
|
|
result: pointer to Char;
|
2025-01-30 23:09:51 +01:00
|
|
|
begin
|
2025-02-08 23:02:27 +01:00
|
|
|
token_content^.ptr := source_code^.text.ptr;
|
|
|
|
token_content^.length := 0u;
|
|
|
|
|
|
|
|
while source_code^.text.length > 1u do
|
|
|
|
if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then
|
|
|
|
source_code^ := advance_source(source_code^, 2u);
|
|
|
|
|
|
|
|
result := cast(malloc(token_content^.length) as pointer to Char);
|
|
|
|
strncpy(result, token_content^.ptr, token_content^.length);
|
|
|
|
token_content^.ptr := result;
|
2025-01-30 23:09:51 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
return true
|
2025-01-30 23:09:51 +01:00
|
|
|
end;
|
2025-02-08 23:02:27 +01:00
|
|
|
token_content^.length := token_content^.length + 1u;
|
|
|
|
source_code^ := advance_source(source_code^, 1)
|
2025-01-30 23:09:51 +01:00
|
|
|
end;
|
2025-02-08 23:02:27 +01:00
|
|
|
token_content^.ptr := nil;
|
|
|
|
token_content^.length := 0u;
|
|
|
|
return false
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-30 23:09:51 +01:00
|
|
|
|
2025-02-01 09:21:29 +01:00
|
|
|
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
|
|
|
|
begin
|
|
|
|
if input^ = '\\' then
|
|
|
|
input := input + 1;
|
|
|
|
if escape_char(input^, @current_token^.value.char_value) then
|
|
|
|
input := input + 1
|
|
|
|
end
|
|
|
|
elsif input^ <> '\0' then
|
|
|
|
current_token^.value.char_value := input^;
|
|
|
|
input := input + 1
|
|
|
|
end;
|
|
|
|
return input
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-01 09:21:29 +01:00
|
|
|
|
2025-02-01 11:47:23 +01:00
|
|
|
proc lex_string(input: pointer to Char, current_token: pointer to Token): pointer to Char;
|
|
|
|
var
|
|
|
|
token_end: pointer to Char,
|
|
|
|
constructed_string: pointer to Char,
|
|
|
|
token_length: Word,
|
|
|
|
is_valid: Bool;
|
|
|
|
begin
|
|
|
|
token_end := input;
|
|
|
|
|
2025-02-07 00:56:54 +01:00
|
|
|
while token_end^ <> '\0' and not ((token_end - 1)^ <> '\\' and token_end^ = '"') do
|
2025-02-01 11:47:23 +01:00
|
|
|
token_end := token_end + 1
|
|
|
|
end;
|
|
|
|
if token_end^ <> '\"' then
|
|
|
|
return input
|
|
|
|
end;
|
2025-02-05 13:24:50 +01:00
|
|
|
token_length := cast(token_end - input as Word);
|
2025-02-01 11:47:23 +01:00
|
|
|
current_token^.value.string_value := cast(calloc(token_length, 1) as pointer to Char);
|
|
|
|
|
|
|
|
is_valid := true;
|
|
|
|
constructed_string := current_token^.value.string_value;
|
2025-02-02 08:22:40 +01:00
|
|
|
while input < token_end and is_valid do
|
2025-02-01 11:47:23 +01:00
|
|
|
|
|
|
|
if input^ = '\\' then
|
|
|
|
input := input + 1;
|
|
|
|
if escape_char(input^, constructed_string) then
|
|
|
|
input := input + 1
|
|
|
|
else
|
|
|
|
is_valid := false
|
|
|
|
end
|
|
|
|
elsif input^ <> '\0' then
|
|
|
|
constructed_string^ := input^;
|
|
|
|
input := input + 1
|
|
|
|
end;
|
|
|
|
|
|
|
|
constructed_string := constructed_string + 1
|
|
|
|
end;
|
|
|
|
|
|
|
|
return token_end
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-01 11:47:23 +01:00
|
|
|
|
2025-01-31 09:46:17 +01:00
|
|
|
proc print_tokens(tokens: pointer to Token, tokens_size: Word);
|
2025-01-21 20:18:27 +01:00
|
|
|
var
|
2025-01-28 11:21:02 +01:00
|
|
|
current_token: pointer to Token,
|
2025-01-31 09:46:17 +01:00
|
|
|
i: Word;
|
2025-01-21 20:18:27 +01:00
|
|
|
begin
|
2025-01-31 09:46:17 +01:00
|
|
|
i := 0u;
|
2025-01-28 11:21:02 +01:00
|
|
|
while i < tokens_size do
|
|
|
|
current_token := tokens + i;
|
|
|
|
|
2025-01-29 12:55:52 +01:00
|
|
|
if current_token^.kind = TOKEN_IF then
|
|
|
|
write_s("IF")
|
|
|
|
elsif current_token^.kind = TOKEN_THEN then
|
|
|
|
write_s("THEN")
|
|
|
|
elsif current_token^.kind = TOKEN_ELSE then
|
|
|
|
write_s("ELSE")
|
|
|
|
elsif current_token^.kind = TOKEN_ELSIF then
|
|
|
|
write_s("ELSIF")
|
|
|
|
elsif current_token^.kind = TOKEN_WHILE then
|
|
|
|
write_s("WHILE")
|
|
|
|
elsif current_token^.kind = TOKEN_DO then
|
|
|
|
write_s("DO")
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_PROC then
|
|
|
|
write_s("PROC")
|
|
|
|
elsif current_token^.kind = TOKEN_BEGIN then
|
|
|
|
write_s("BEGIN")
|
|
|
|
elsif current_token^.kind = TOKEN_END then
|
|
|
|
write_s("END")
|
|
|
|
elsif current_token^.kind = TOKEN_EXTERN then
|
|
|
|
write_s("EXTERN")
|
|
|
|
elsif current_token^.kind = TOKEN_CONST then
|
|
|
|
write_s("CONST")
|
|
|
|
elsif current_token^.kind = TOKEN_VAR then
|
|
|
|
write_s("VAR")
|
|
|
|
elsif current_token^.kind = TOKEN_ARRAY then
|
|
|
|
write_s("ARRAY")
|
|
|
|
elsif current_token^.kind = TOKEN_OF then
|
|
|
|
write_s("OF")
|
|
|
|
elsif current_token^.kind = TOKEN_TYPE then
|
|
|
|
write_s("TYPE")
|
|
|
|
elsif current_token^.kind = TOKEN_RECORD then
|
|
|
|
write_s("RECORD")
|
|
|
|
elsif current_token^.kind = TOKEN_UNION then
|
|
|
|
write_s("UNION")
|
|
|
|
elsif current_token^.kind = TOKEN_POINTER then
|
|
|
|
write_s("POINTER")
|
|
|
|
elsif current_token^.kind = TOKEN_TO then
|
|
|
|
write_s("TO")
|
|
|
|
elsif current_token^.kind = TOKEN_BOOLEAN then
|
|
|
|
write_s("BOOLEAN<");
|
|
|
|
write_b(current_token^.value.boolean_value);
|
|
|
|
write_c('>')
|
|
|
|
elsif current_token^.kind = TOKEN_NIL then
|
|
|
|
write_s("NIL")
|
|
|
|
elsif current_token^.kind = TOKEN_AND then
|
|
|
|
write_s("AND")
|
|
|
|
elsif current_token^.kind = TOKEN_OR then
|
|
|
|
write_s("OR")
|
|
|
|
elsif current_token^.kind = TOKEN_NOT then
|
|
|
|
write_s("NOT")
|
|
|
|
elsif current_token^.kind = TOKEN_RETURN then
|
|
|
|
write_s("RETURN")
|
|
|
|
elsif current_token^.kind = TOKEN_CAST then
|
|
|
|
write_s("CAST")
|
|
|
|
elsif current_token^.kind = TOKEN_AS then
|
|
|
|
write_s("AS")
|
|
|
|
elsif current_token^.kind = TOKEN_SIZEOF then
|
|
|
|
write_s("SIZEOF")
|
|
|
|
elsif current_token^.kind = TOKEN_IDENTIFIER then
|
|
|
|
write_c('<');
|
2025-02-07 22:12:59 +01:00
|
|
|
write_z(current_token^.value.string_value);
|
2025-01-29 12:55:52 +01:00
|
|
|
write_c('>')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_LEFT_PAREN then
|
|
|
|
write_s("(")
|
|
|
|
elsif current_token^.kind = TOKEN_RIGHT_PAREN then
|
|
|
|
write_s(")")
|
|
|
|
elsif current_token^.kind = TOKEN_LEFT_SQUARE then
|
|
|
|
write_s("[")
|
|
|
|
elsif current_token^.kind = TOKEN_RIGHT_SQUARE then
|
|
|
|
write_s("]")
|
|
|
|
elsif current_token^.kind = TOKEN_GREATER_EQUAL then
|
|
|
|
write_s(">=")
|
|
|
|
elsif current_token^.kind = TOKEN_LESS_EQUAL then
|
|
|
|
write_s("<=")
|
|
|
|
elsif current_token^.kind = TOKEN_GREATER_THAN then
|
|
|
|
write_s(">")
|
|
|
|
elsif current_token^.kind = TOKEN_LESS_THAN then
|
|
|
|
write_s("<")
|
|
|
|
elsif current_token^.kind = TOKEN_EQUAL then
|
|
|
|
write_s("=")
|
|
|
|
elsif current_token^.kind = TOKEN_NOT_EQUAL then
|
|
|
|
write_s("<>")
|
|
|
|
elsif current_token^.kind = TOKEN_SEMICOLON then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c(';')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_DOT then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('.')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_COMMA then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c(',')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_PLUS then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('+')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_MINUS then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('-')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_MULTIPLICATION then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('*')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_DIVISION then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('/')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_REMAINDER then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('%')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_ASSIGNMENT then
|
|
|
|
write_s(":=")
|
|
|
|
elsif current_token^.kind = TOKEN_COLON then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c(':')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_HAT then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('^')
|
2025-01-30 01:03:16 +01:00
|
|
|
elsif current_token^.kind = TOKEN_AT then
|
2025-01-31 09:46:17 +01:00
|
|
|
write_c('@')
|
2025-01-30 23:09:51 +01:00
|
|
|
elsif current_token^.kind = TOKEN_COMMENT then
|
2025-02-08 23:02:27 +01:00
|
|
|
write_s("(* COMMENT *)")
|
2025-01-30 23:09:51 +01:00
|
|
|
elsif current_token^.kind = TOKEN_INTEGER then
|
|
|
|
write_c('<');
|
|
|
|
write_i(current_token^.value.int_value);
|
|
|
|
write_c('>')
|
2025-01-31 09:46:17 +01:00
|
|
|
elsif current_token^.kind = TOKEN_WORD then
|
|
|
|
write_c('<');
|
|
|
|
write_i(current_token^.value.int_value);
|
|
|
|
write_s("u>")
|
|
|
|
elsif current_token^.kind = TOKEN_CHARACTER then
|
|
|
|
write_c('<');
|
|
|
|
write_i(current_token^.value.char_value);
|
|
|
|
write_s("c>")
|
2025-02-01 09:21:29 +01:00
|
|
|
elsif current_token^.kind = TOKEN_STRING then
|
|
|
|
write_s("\"...\"")
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif current_token^.kind = TOKEN_DEFER then
|
|
|
|
write_s("DEFER")
|
2025-01-29 12:55:52 +01:00
|
|
|
else
|
|
|
|
write_s("UNKNOWN<");
|
|
|
|
write_i(current_token^.kind);
|
2025-02-01 09:21:29 +01:00
|
|
|
write_c('>')
|
2025-01-29 12:55:52 +01:00
|
|
|
end;
|
|
|
|
write_c(' ');
|
2025-01-28 11:21:02 +01:00
|
|
|
|
2025-02-05 13:24:50 +01:00
|
|
|
i := i + 1u
|
2025-02-01 11:47:23 +01:00
|
|
|
end;
|
|
|
|
write_c('\n')
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-30 01:03:16 +01:00
|
|
|
|
|
|
|
proc categorize_identifier(input_pointer: pointer to Char, token_length: Int): Token;
|
|
|
|
var
|
|
|
|
current_token: Token;
|
|
|
|
begin
|
2025-02-07 22:12:59 +01:00
|
|
|
if string_equals_chars("if", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_IF
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("then", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_THEN
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("else", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_ELSE
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("elsif", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_ELSIF
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("while", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_WHILE
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("do", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_DO
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("proc", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_PROC
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("begin", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_BEGIN
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("end", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_END
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("extern", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_EXTERN
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("const", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_CONST
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("var", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_VAR
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("array", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_ARRAY
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("of", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_OF
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("type", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_TYPE
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("record", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_RECORD
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("union", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_UNION
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("pointer", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_POINTER
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("to", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_TO
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("true", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_BOOLEAN;
|
|
|
|
current_token.value.boolean_value := true
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("false", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_BOOLEAN;
|
|
|
|
current_token.value.boolean_value := false
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("nil", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_NIL
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("and", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_AND
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("or", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_OR
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("not", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_NOT
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("return", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_RETURN
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("cast", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_CAST
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("as", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_AS
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("sizeof", input_pointer, token_length) then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token.kind := TOKEN_SIZEOF
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif string_equals_chars("defer", input_pointer, token_length) then
|
|
|
|
current_token.kind := TOKEN_DEFER
|
2025-01-30 01:03:16 +01:00
|
|
|
else
|
|
|
|
current_token.kind := TOKEN_IDENTIFIER;
|
|
|
|
current_token.value.string_value := cast(calloc(token_length + 1, 1) as pointer to Char);
|
|
|
|
strncpy(current_token.value.string_value, input_pointer, token_length)
|
|
|
|
end;
|
|
|
|
|
|
|
|
return current_token
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-30 01:03:16 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token;
|
2025-01-30 01:03:16 +01:00
|
|
|
var
|
|
|
|
token_end: pointer to Char,
|
|
|
|
tokens: pointer to Token,
|
|
|
|
current_token: pointer to Token,
|
2025-02-08 23:02:27 +01:00
|
|
|
token_length: Word,
|
|
|
|
first_char: Char,
|
|
|
|
token_content: String;
|
2025-01-30 01:03:16 +01:00
|
|
|
begin
|
2025-02-01 11:47:23 +01:00
|
|
|
tokens_size^ := 0u;
|
2025-02-04 13:28:09 +01:00
|
|
|
tokens := nil;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := skip_spaces(source_code);
|
2025-01-30 01:03:16 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
while source_code.text.length <> 0u do
|
2025-02-05 13:24:50 +01:00
|
|
|
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
|
2025-02-01 11:47:23 +01:00
|
|
|
current_token := tokens + tokens_size^;
|
2025-02-08 23:02:27 +01:00
|
|
|
first_char := char_at(source_code.text, 0);
|
2025-01-30 01:03:16 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
if is_alpha(first_char) or first_char = '_' then
|
|
|
|
token_end := lex_identifier(source_code.text.ptr + 1);
|
|
|
|
token_length := cast(token_end - source_code.text.ptr as Word);
|
2025-01-30 01:03:16 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
current_token^ := categorize_identifier(source_code.text.ptr, token_length);
|
2025-01-30 01:03:16 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, token_length)
|
|
|
|
elsif is_digit(first_char) then
|
2025-02-04 13:28:09 +01:00
|
|
|
token_end := nil;
|
2025-02-08 23:02:27 +01:00
|
|
|
current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10);
|
|
|
|
token_length := cast(token_end - source_code.text.ptr as Word);
|
2025-01-31 09:46:17 +01:00
|
|
|
|
|
|
|
if token_end^ = 'u' then
|
|
|
|
current_token^.kind := TOKEN_WORD;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, token_length + 1u)
|
2025-01-31 09:46:17 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := TOKEN_INTEGER;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, token_length)
|
2025-01-31 09:46:17 +01:00
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '(' then
|
|
|
|
source_code := advance_source(source_code, 1u);
|
|
|
|
|
|
|
|
if source_code.text.length = 0u then
|
|
|
|
current_token^.kind := TOKEN_LEFT_PAREN
|
|
|
|
elsif char_at(source_code.text, 0u) = '*' then
|
|
|
|
source_code := advance_source(source_code, 1u);
|
|
|
|
|
|
|
|
if lex_comment(@source_code, @token_content) then
|
|
|
|
current_token^.value.string := token_content;
|
|
|
|
current_token^.kind := TOKEN_COMMENT
|
2025-01-30 23:09:51 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := 0
|
|
|
|
end
|
|
|
|
else
|
|
|
|
current_token^.kind := TOKEN_LEFT_PAREN
|
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = ')' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_RIGHT_PAREN;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '\'' then
|
|
|
|
token_end := lex_character(source_code.text.ptr + 1, current_token);
|
|
|
|
token_length := cast(token_end - source_code.text.ptr as Word);
|
2025-02-01 09:21:29 +01:00
|
|
|
|
|
|
|
if token_end^ = '\'' then
|
2025-01-31 09:46:17 +01:00
|
|
|
current_token^.kind := TOKEN_CHARACTER;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, token_length + 1u)
|
2025-02-01 09:21:29 +01:00
|
|
|
else
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-31 09:46:17 +01:00
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '"' then
|
|
|
|
token_end := lex_string(source_code.text.ptr + 1, current_token);
|
2025-02-01 11:47:23 +01:00
|
|
|
|
|
|
|
if token_end^ = '"' then
|
|
|
|
current_token^.kind := TOKEN_STRING;
|
2025-02-08 23:02:27 +01:00
|
|
|
token_length := cast(token_end - source_code.text.ptr as Word);
|
|
|
|
source_code := advance_source(source_code, token_length + 1u)
|
2025-02-01 11:47:23 +01:00
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '[' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_LEFT_SQUARE;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = ']' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_RIGHT_SQUARE;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '>' then
|
|
|
|
source_code := advance_source(source_code, 1u);
|
|
|
|
|
|
|
|
if source_code.text.length = 0u then
|
|
|
|
current_token^.kind := TOKEN_GREATER_THAN
|
|
|
|
elsif char_at(source_code.text, 0) = '=' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_GREATER_EQUAL;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-30 01:03:16 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := TOKEN_GREATER_THAN
|
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '<' then
|
|
|
|
source_code := advance_source(source_code, 1u);
|
|
|
|
|
|
|
|
if source_code.text.length = 0u then
|
|
|
|
current_token^.kind := TOKEN_LESS_THAN
|
|
|
|
elsif char_at(source_code.text, 0) = '=' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_LESS_EQUAL;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif char_at(source_code.text, 0) = '>' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_NOT_EQUAL;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-30 01:03:16 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := TOKEN_LESS_THAN
|
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '=' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_EQUAL;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = ';' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_SEMICOLON;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '.' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_DOT;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = ',' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_COMMA;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '+' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_PLUS;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '-' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_MINUS;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '*' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_MULTIPLICATION;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '/' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_DIVISION;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '%' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_REMAINDER;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = ':' then
|
|
|
|
source_code := advance_source(source_code, 1u);
|
|
|
|
|
|
|
|
if source_code.text.length = 0u then
|
|
|
|
current_token^.kind := TOKEN_COLON
|
|
|
|
elsif char_at(source_code.text, 0) = '=' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_ASSIGNMENT;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-30 01:03:16 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := TOKEN_COLON
|
|
|
|
end
|
2025-02-08 23:02:27 +01:00
|
|
|
elsif first_char = '^' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_HAT;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
|
|
|
elsif first_char = '@' then
|
2025-01-30 01:03:16 +01:00
|
|
|
current_token^.kind := TOKEN_AT;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-30 01:03:16 +01:00
|
|
|
else
|
|
|
|
current_token^.kind := 0;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := advance_source(source_code, 1u)
|
2025-01-30 01:03:16 +01:00
|
|
|
end;
|
|
|
|
|
|
|
|
if current_token^.kind <> 0 then
|
2025-02-05 13:24:50 +01:00
|
|
|
tokens_size^ := tokens_size^ + 1u;
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code := skip_spaces(source_code)
|
2025-02-01 11:47:23 +01:00
|
|
|
else
|
|
|
|
write_s("Lexical analysis error on \"");
|
2025-02-08 23:02:27 +01:00
|
|
|
write_c(first_char);
|
2025-02-01 11:47:23 +01:00
|
|
|
write_s("\".\n")
|
2025-01-30 01:03:16 +01:00
|
|
|
end
|
2025-01-21 20:18:27 +01:00
|
|
|
end;
|
2025-01-20 21:46:03 +01:00
|
|
|
|
2025-02-01 11:47:23 +01:00
|
|
|
return tokens
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-01-20 21:46:03 +01:00
|
|
|
|
2025-02-03 23:04:00 +01:00
|
|
|
proc parse_literal(tokens: pointer to pointer to Token, tokens_size: pointer to Word): pointer to Literal;
|
|
|
|
begin
|
|
|
|
return cast(calloc(1, sizeof(Literal)) as pointer to Literal)
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-03 23:04:00 +01:00
|
|
|
|
|
|
|
proc parse_constant_definition(tokens: pointer to pointer to Token,
|
|
|
|
tokens_size: pointer to Word): pointer to ConstantDefinition;
|
|
|
|
var
|
|
|
|
result: pointer to ConstantDefinition;
|
|
|
|
begin
|
|
|
|
result := cast(calloc(1, sizeof(ConstantDefinition)) as pointer to ConstantDefinition);
|
|
|
|
|
|
|
|
result^.name := cast(malloc(strlen(tokens^^.value.string_value)) as pointer to Char);
|
|
|
|
strcpy(result^.name, tokens^^.value.string_value);
|
|
|
|
|
2025-02-05 13:24:50 +01:00
|
|
|
tokens^ := tokens^ + 2u;
|
|
|
|
tokens_size := tokens_size - 2u;
|
2025-02-03 23:04:00 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
write_z(result^.name);
|
2025-02-03 23:04:00 +01:00
|
|
|
write_c('\n');
|
|
|
|
|
|
|
|
result^.body := parse_literal(tokens, tokens_size);
|
|
|
|
|
2025-02-05 13:24:50 +01:00
|
|
|
tokens^ := tokens^ + 2u;
|
|
|
|
tokens_size := tokens_size - 2u;
|
2025-02-03 23:04:00 +01:00
|
|
|
|
|
|
|
return result
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-03 23:04:00 +01:00
|
|
|
|
|
|
|
proc parse_program(tokens: pointer to pointer to Token, tokens_size: pointer to Word): pointer to Program;
|
|
|
|
var
|
|
|
|
result: pointer to Program,
|
|
|
|
current_constant: pointer to pointer to ConstantDefinition;
|
|
|
|
begin
|
|
|
|
result := cast(calloc(1, sizeof(Program)) as pointer to Program);
|
|
|
|
|
2025-02-04 13:28:09 +01:00
|
|
|
result^.constants.elements := nil;
|
2025-02-03 23:04:00 +01:00
|
|
|
result^.constants.count := 0u;
|
|
|
|
|
|
|
|
if tokens^^.kind = TOKEN_CONST then
|
2025-02-05 13:24:50 +01:00
|
|
|
tokens^ := tokens^ + 1;
|
|
|
|
tokens_size^ := tokens_size^ - 1u;
|
2025-02-03 23:04:00 +01:00
|
|
|
|
|
|
|
while tokens_size^ > 0u and tokens^^.kind = TOKEN_IDENTIFIER do
|
|
|
|
result^.constants.elements := cast(
|
|
|
|
reallocarray(result^.constants.elements, result^.constants.count + 1u, sizeof(pointer to ConstantDefinition))
|
|
|
|
as pointer to pointer to ConstantDefinition);
|
2025-02-05 13:24:50 +01:00
|
|
|
current_constant := result^.constants.elements + result^.constants.count;
|
2025-02-03 23:04:00 +01:00
|
|
|
|
|
|
|
result^.constants.count := result^.constants.count + 1u;
|
|
|
|
|
|
|
|
current_constant^ := parse_constant_definition(tokens, tokens_size);
|
|
|
|
if current_constant^ = nil then
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-03 23:04:00 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
proc parse_command_line*(argc: Int, argv: pointer to pointer to Char): pointer to CommandLine;
|
2025-02-01 11:47:23 +01:00
|
|
|
var
|
|
|
|
parameter: pointer to pointer to Char,
|
2025-02-02 08:22:40 +01:00
|
|
|
i: Int,
|
|
|
|
result: pointer to CommandLine;
|
2025-01-11 23:20:23 +01:00
|
|
|
begin
|
2025-02-04 13:28:09 +01:00
|
|
|
i := 1;
|
|
|
|
result := cast(malloc(sizeof(CommandLine)) as pointer to CommandLine);
|
|
|
|
result^.tokenize := false;
|
2025-02-07 22:12:59 +01:00
|
|
|
result^.syntax_tree := false;
|
2025-02-04 13:28:09 +01:00
|
|
|
result^.input := nil;
|
|
|
|
|
|
|
|
while i < argc do
|
2025-02-05 13:24:50 +01:00
|
|
|
parameter := argv + i;
|
2025-02-04 13:28:09 +01:00
|
|
|
|
2025-02-07 22:12:59 +01:00
|
|
|
if strcmp(parameter^, "--tokenize\0".ptr) = 0 then
|
2025-02-04 13:28:09 +01:00
|
|
|
result^.tokenize := true
|
2025-02-07 22:12:59 +01:00
|
|
|
elsif strcmp(parameter^, "--syntax-tree\0".ptr) = 0 then
|
|
|
|
result^.syntax_tree := true
|
2025-02-04 13:28:09 +01:00
|
|
|
elsif parameter^^ <> '-' then
|
|
|
|
result^.input := parameter^
|
|
|
|
else
|
|
|
|
write_s("Fatal error: Unknown command line options:");
|
2025-02-02 08:22:40 +01:00
|
|
|
|
|
|
|
write_c(' ');
|
2025-02-07 22:12:59 +01:00
|
|
|
write_z(parameter^);
|
2025-02-04 13:28:09 +01:00
|
|
|
write_s(".\n");
|
|
|
|
|
|
|
|
return nil
|
2025-02-02 08:22:40 +01:00
|
|
|
end;
|
2025-02-04 13:28:09 +01:00
|
|
|
|
|
|
|
i := i + 1
|
|
|
|
end;
|
|
|
|
if result^.input = nil then
|
|
|
|
write_s("Fatal error: no input files.\n");
|
2025-02-02 08:22:40 +01:00
|
|
|
return nil
|
|
|
|
end;
|
2025-01-16 15:09:58 +01:00
|
|
|
|
2025-02-02 08:22:40 +01:00
|
|
|
return result
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-01 11:47:23 +01:00
|
|
|
|
2025-02-02 08:22:40 +01:00
|
|
|
proc process(argc: Int, argv: pointer to pointer to Char): Int;
|
2025-02-01 11:47:23 +01:00
|
|
|
var
|
|
|
|
tokens: pointer to Token,
|
2025-02-02 08:22:40 +01:00
|
|
|
tokens_size: Word,
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code: SourceCode,
|
2025-02-02 08:22:40 +01:00
|
|
|
command_line: pointer to CommandLine;
|
2025-02-01 11:47:23 +01:00
|
|
|
begin
|
2025-02-02 08:22:40 +01:00
|
|
|
command_line := parse_command_line(argc, argv);
|
2025-02-07 22:12:59 +01:00
|
|
|
if command_line = nil then
|
2025-02-02 08:22:40 +01:00
|
|
|
return 2
|
|
|
|
end;
|
2025-02-07 22:12:59 +01:00
|
|
|
|
2025-02-08 23:02:27 +01:00
|
|
|
source_code.position := make_position();
|
|
|
|
if not read_source(command_line^.input, @source_code.text) then
|
2025-02-03 23:04:00 +01:00
|
|
|
perror(command_line^.input);
|
|
|
|
return 3
|
|
|
|
end;
|
2025-02-08 23:02:27 +01:00
|
|
|
tokens := tokenize(source_code, @tokens_size);
|
2025-02-01 11:47:23 +01:00
|
|
|
|
2025-02-04 13:28:09 +01:00
|
|
|
if command_line^.tokenize then
|
|
|
|
print_tokens(tokens, tokens_size)
|
|
|
|
end;
|
2025-02-07 22:12:59 +01:00
|
|
|
if command_line^.syntax_tree then
|
|
|
|
parse_program(@tokens, @tokens_size)
|
|
|
|
end;
|
2025-02-03 23:04:00 +01:00
|
|
|
return 0
|
2025-02-07 22:12:59 +01:00
|
|
|
end
|
2025-02-01 11:47:23 +01:00
|
|
|
|
|
|
|
begin
|
2025-02-02 08:22:40 +01:00
|
|
|
exit(process(count, parameters))
|
2025-01-11 13:32:37 +01:00
|
|
|
end.
|