Make string and char pointers comparable

This commit is contained in:
2025-02-08 23:02:27 +01:00
parent 8a0f282714
commit d88bd652a4
15 changed files with 425 additions and 123 deletions

View File

@ -25,9 +25,14 @@ type
first: Position;
last: Position
end,
SourceCode = record
position: Position;
text: String
end,
TokenValue* = union
int_value: Int;
string_value: pointer to Char;
string: String;
boolean_value: Bool;
char_value: Char
end,
@ -179,11 +184,32 @@ begin
return strncmp(this.ptr, that, length) = 0
end
proc open_substring(string: String, start: Word): String;
begin
string.ptr := string.ptr + start;
string.length := string.length - start;
return string
end
proc char_at(string: String, position: Word): Char;
begin
return (string.ptr + position)^
end
(*
End of standard procedures.
*)
proc read_source(filename: pointer to Char): pointer to Char;
proc make_position(): Position;
var
result: Position;
begin
result.line := 1u;
result.column := 1u;
return result
end
proc read_source(filename: pointer to Char, result: pointer to String): Bool;
var
input_file: pointer to FILE,
source_size: Int,
@ -192,26 +218,28 @@ begin
input_file := fopen(filename, "rb\0".ptr);
if input_file = nil then
return nil
return false
end;
defer
fclose(input_file)
end;
if fseek(input_file, 0, SEEK_END) <> 0 then
fclose(input_file);
return nil
return false
end;
source_size := ftell(input_file);
if source_size < 0 then
fclose(input_file);
return nil
return false
end;
rewind(input_file);
input := calloc(source_size + 1, 1);
input := malloc(source_size);
if fread(input, source_size, 1, input_file) <> 1u then
input := nil
return false
end;
fclose(input_file);
result^.length := cast(source_size as Word);
result^.ptr := cast(input as pointer to Char);
return input
return true
end
proc escape_char(escape: Char, result: pointer to Char): Bool;
@ -257,12 +285,26 @@ begin
end
end
proc skip_spaces(input: pointer to Char): pointer to Char;
proc advance_source(source_code: SourceCode, length: Word): SourceCode;
begin
while is_space(input^) do
input := input + 1
source_code.text := open_substring(source_code.text, length);
source_code.position.column := source_code.position.column + length;
return source_code
end
proc skip_spaces(source_code: SourceCode): SourceCode;
begin
while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do
if char_at(source_code.text, 0) = '\n' then
source_code.position.line := source_code.position.line + 1u;
source_code.position.column := 1u
else
source_code.position.column := source_code.position.column + 1u
end;
source_code.text := open_substring(source_code.text, 1u)
end;
return input
return source_code
end
proc lex_identifier(input: pointer to Char): pointer to Char;
@ -273,19 +315,29 @@ begin
return input
end
proc lex_comment(input: pointer to Char): pointer to Char;
proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool;
var
next: pointer to Char;
result: pointer to Char;
begin
while input^ <> '\0' do
next := input + 1;
token_content^.ptr := source_code^.text.ptr;
token_content^.length := 0u;
if input^ = '*' and next^ = ')' then
return next + 1
while source_code^.text.length > 1u do
if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then
source_code^ := advance_source(source_code^, 2u);
result := cast(malloc(token_content^.length) as pointer to Char);
strncpy(result, token_content^.ptr, token_content^.length);
token_content^.ptr := result;
return true
end;
input := next
token_content^.length := token_content^.length + 1u;
source_code^ := advance_source(source_code^, 1)
end;
return nil
token_content^.ptr := nil;
token_content^.length := 0u;
return false
end
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
@ -458,7 +510,7 @@ begin
elsif current_token^.kind = TOKEN_AT then
write_c('@')
elsif current_token^.kind = TOKEN_COMMENT then
write_s("COMMENT")
write_s("(* COMMENT *)")
elsif current_token^.kind = TOKEN_INTEGER then
write_c('<');
write_i(current_token^.value.int_value);
@ -562,154 +614,167 @@ begin
return current_token
end
proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token;
proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token;
var
token_end: pointer to Char,
tokens: pointer to Token,
current_token: pointer to Token,
token_length: Word;
token_length: Word,
first_char: Char,
token_content: String;
begin
tokens_size^ := 0u;
tokens := nil;
source_code := skip_spaces(source_code);
input_pointer := skip_spaces(input_pointer);
while input_pointer^ <> '\0' do
while source_code.text.length <> 0u do
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
current_token := tokens + tokens_size^;
first_char := char_at(source_code.text, 0);
if is_alpha(input_pointer^) or input_pointer^ = '_' then
token_end := lex_identifier(input_pointer + 1);
token_length := cast(token_end as Word) - cast(input_pointer as Word);
if is_alpha(first_char) or first_char = '_' then
token_end := lex_identifier(source_code.text.ptr + 1);
token_length := cast(token_end - source_code.text.ptr as Word);
current_token^ := categorize_identifier(input_pointer, token_length);
current_token^ := categorize_identifier(source_code.text.ptr, token_length);
input_pointer := token_end
elsif is_digit(input_pointer^) then
source_code := advance_source(source_code, token_length)
elsif is_digit(first_char) then
token_end := nil;
current_token^.value.int_value := strtol(input_pointer, @token_end, 10);
current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10);
token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = 'u' then
current_token^.kind := TOKEN_WORD;
input_pointer := token_end + 1
source_code := advance_source(source_code, token_length + 1u)
else
current_token^.kind := TOKEN_INTEGER;
input_pointer := token_end
source_code := advance_source(source_code, token_length)
end
elsif input_pointer^ = '(' then
input_pointer := input_pointer + 1;
if input_pointer^ = '*' then
token_end := lex_comment(input_pointer + 1);
elsif first_char = '(' then
source_code := advance_source(source_code, 1u);
if token_end <> nil then
token_length := cast(token_end as Word) - cast(input_pointer as Word);
current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char);
strncpy(current_token^.value.string_value, input_pointer, token_length);
current_token^.kind := TOKEN_COMMENT;
if source_code.text.length = 0u then
current_token^.kind := TOKEN_LEFT_PAREN
elsif char_at(source_code.text, 0u) = '*' then
source_code := advance_source(source_code, 1u);
input_pointer := token_end
if lex_comment(@source_code, @token_content) then
current_token^.value.string := token_content;
current_token^.kind := TOKEN_COMMENT
else
current_token^.kind := 0
end
else
current_token^.kind := TOKEN_LEFT_PAREN
end
elsif input_pointer^ = ')' then
elsif first_char = ')' then
current_token^.kind := TOKEN_RIGHT_PAREN;
input_pointer := input_pointer + 1
elsif input_pointer^ = '\'' then
token_end := lex_character(input_pointer + 1, current_token);
source_code := advance_source(source_code, 1u)
elsif first_char = '\'' then
token_end := lex_character(source_code.text.ptr + 1, current_token);
token_length := cast(token_end - source_code.text.ptr as Word);
if token_end^ = '\'' then
current_token^.kind := TOKEN_CHARACTER;
input_pointer := token_end + 1
source_code := advance_source(source_code, token_length + 1u)
else
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
end
elsif input_pointer^ = '"' then
token_end := lex_string(input_pointer + 1, current_token);
elsif first_char = '"' then
token_end := lex_string(source_code.text.ptr + 1, current_token);
if token_end^ = '"' then
current_token^.kind := TOKEN_STRING;
input_pointer := token_end + 1
token_length := cast(token_end - source_code.text.ptr as Word);
source_code := advance_source(source_code, token_length + 1u)
end
elsif input_pointer^ = '[' then
elsif first_char = '[' then
current_token^.kind := TOKEN_LEFT_SQUARE;
input_pointer := input_pointer + 1
elsif input_pointer^ = ']' then
source_code := advance_source(source_code, 1u)
elsif first_char = ']' then
current_token^.kind := TOKEN_RIGHT_SQUARE;
input_pointer := input_pointer + 1
elsif input_pointer^ = '>' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
source_code := advance_source(source_code, 1u)
elsif first_char = '>' then
source_code := advance_source(source_code, 1u);
if source_code.text.length = 0u then
current_token^.kind := TOKEN_GREATER_THAN
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_GREATER_EQUAL;
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_GREATER_THAN
end
elsif input_pointer^ = '<' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
elsif first_char = '<' then
source_code := advance_source(source_code, 1u);
if source_code.text.length = 0u then
current_token^.kind := TOKEN_LESS_THAN
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_LESS_EQUAL;
input_pointer := input_pointer + 1
elsif input_pointer^ = '>' then
source_code := advance_source(source_code, 1u)
elsif char_at(source_code.text, 0) = '>' then
current_token^.kind := TOKEN_NOT_EQUAL;
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_LESS_THAN
end
elsif input_pointer^ = '=' then
elsif first_char = '=' then
current_token^.kind := TOKEN_EQUAL;
input_pointer := input_pointer + 1
elsif input_pointer^ = ';' then
source_code := advance_source(source_code, 1u)
elsif first_char = ';' then
current_token^.kind := TOKEN_SEMICOLON;
input_pointer := input_pointer + 1
elsif input_pointer^ = '.' then
source_code := advance_source(source_code, 1u)
elsif first_char = '.' then
current_token^.kind := TOKEN_DOT;
input_pointer := input_pointer + 1
elsif input_pointer^ = ',' then
source_code := advance_source(source_code, 1u)
elsif first_char = ',' then
current_token^.kind := TOKEN_COMMA;
input_pointer := input_pointer + 1
elsif input_pointer^ = '+' then
source_code := advance_source(source_code, 1u)
elsif first_char = '+' then
current_token^.kind := TOKEN_PLUS;
input_pointer := input_pointer + 1
elsif input_pointer^ = '-' then
source_code := advance_source(source_code, 1u)
elsif first_char = '-' then
current_token^.kind := TOKEN_MINUS;
input_pointer := input_pointer + 1
elsif input_pointer^ = '*' then
source_code := advance_source(source_code, 1u)
elsif first_char = '*' then
current_token^.kind := TOKEN_MULTIPLICATION;
input_pointer := input_pointer + 1
elsif input_pointer^ = '/' then
source_code := advance_source(source_code, 1u)
elsif first_char = '/' then
current_token^.kind := TOKEN_DIVISION;
input_pointer := input_pointer + 1
elsif input_pointer^ = '%' then
source_code := advance_source(source_code, 1u)
elsif first_char = '%' then
current_token^.kind := TOKEN_REMAINDER;
input_pointer := input_pointer + 1
elsif input_pointer^ = ':' then
input_pointer := input_pointer + 1;
if input_pointer^ = '=' then
source_code := advance_source(source_code, 1u)
elsif first_char = ':' then
source_code := advance_source(source_code, 1u);
if source_code.text.length = 0u then
current_token^.kind := TOKEN_COLON
elsif char_at(source_code.text, 0) = '=' then
current_token^.kind := TOKEN_ASSIGNMENT;
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
else
current_token^.kind := TOKEN_COLON
end
elsif input_pointer^ = '^' then
elsif first_char = '^' then
current_token^.kind := TOKEN_HAT;
input_pointer := input_pointer + 1
elsif input_pointer^ = '@' then
source_code := advance_source(source_code, 1u)
elsif first_char = '@' then
current_token^.kind := TOKEN_AT;
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
else
current_token^.kind := 0;
input_pointer := input_pointer + 1
source_code := advance_source(source_code, 1u)
end;
if current_token^.kind <> 0 then
tokens_size^ := tokens_size^ + 1u;
input_pointer := skip_spaces(input_pointer)
source_code := skip_spaces(source_code)
else
write_s("Lexical analysis error on \"");
write_c(input_pointer^);
write_c(first_char);
write_s("\".\n")
end
end;
@ -819,9 +884,9 @@ end
proc process(argc: Int, argv: pointer to pointer to Char): Int;
var
input: pointer to Char,
tokens: pointer to Token,
tokens_size: Word,
source_code: SourceCode,
command_line: pointer to CommandLine;
begin
command_line := parse_command_line(argc, argv);
@ -829,12 +894,12 @@ begin
return 2
end;
input := read_source(command_line^.input);
if input = nil then
source_code.position := make_position();
if not read_source(command_line^.input, @source_code.text) then
perror(command_line^.input);
return 3
end;
tokens := tokenize(input, @tokens_size);
tokens := tokenize(source_code, @tokens_size);
if command_line^.tokenize then
print_tokens(tokens, tokens_size)