Make string and char pointers comparable
This commit is contained in:
271
source.elna
271
source.elna
@ -25,9 +25,14 @@ type
|
||||
first: Position;
|
||||
last: Position
|
||||
end,
|
||||
SourceCode = record
|
||||
position: Position;
|
||||
text: String
|
||||
end,
|
||||
TokenValue* = union
|
||||
int_value: Int;
|
||||
string_value: pointer to Char;
|
||||
string: String;
|
||||
boolean_value: Bool;
|
||||
char_value: Char
|
||||
end,
|
||||
@ -179,11 +184,32 @@ begin
|
||||
return strncmp(this.ptr, that, length) = 0
|
||||
end
|
||||
|
||||
proc open_substring(string: String, start: Word): String;
|
||||
begin
|
||||
string.ptr := string.ptr + start;
|
||||
string.length := string.length - start;
|
||||
return string
|
||||
end
|
||||
|
||||
proc char_at(string: String, position: Word): Char;
|
||||
begin
|
||||
return (string.ptr + position)^
|
||||
end
|
||||
|
||||
(*
|
||||
End of standard procedures.
|
||||
*)
|
||||
|
||||
proc read_source(filename: pointer to Char): pointer to Char;
|
||||
proc make_position(): Position;
|
||||
var
|
||||
result: Position;
|
||||
begin
|
||||
result.line := 1u;
|
||||
result.column := 1u;
|
||||
return result
|
||||
end
|
||||
|
||||
proc read_source(filename: pointer to Char, result: pointer to String): Bool;
|
||||
var
|
||||
input_file: pointer to FILE,
|
||||
source_size: Int,
|
||||
@ -192,26 +218,28 @@ begin
|
||||
input_file := fopen(filename, "rb\0".ptr);
|
||||
|
||||
if input_file = nil then
|
||||
return nil
|
||||
return false
|
||||
end;
|
||||
defer
|
||||
fclose(input_file)
|
||||
end;
|
||||
if fseek(input_file, 0, SEEK_END) <> 0 then
|
||||
fclose(input_file);
|
||||
return nil
|
||||
return false
|
||||
end;
|
||||
source_size := ftell(input_file);
|
||||
if source_size < 0 then
|
||||
fclose(input_file);
|
||||
return nil
|
||||
return false
|
||||
end;
|
||||
rewind(input_file);
|
||||
|
||||
input := calloc(source_size + 1, 1);
|
||||
input := malloc(source_size);
|
||||
if fread(input, source_size, 1, input_file) <> 1u then
|
||||
input := nil
|
||||
return false
|
||||
end;
|
||||
fclose(input_file);
|
||||
result^.length := cast(source_size as Word);
|
||||
result^.ptr := cast(input as pointer to Char);
|
||||
|
||||
return input
|
||||
return true
|
||||
end
|
||||
|
||||
proc escape_char(escape: Char, result: pointer to Char): Bool;
|
||||
@ -257,12 +285,26 @@ begin
|
||||
end
|
||||
end
|
||||
|
||||
proc skip_spaces(input: pointer to Char): pointer to Char;
|
||||
proc advance_source(source_code: SourceCode, length: Word): SourceCode;
|
||||
begin
|
||||
while is_space(input^) do
|
||||
input := input + 1
|
||||
source_code.text := open_substring(source_code.text, length);
|
||||
source_code.position.column := source_code.position.column + length;
|
||||
|
||||
return source_code
|
||||
end
|
||||
|
||||
proc skip_spaces(source_code: SourceCode): SourceCode;
|
||||
begin
|
||||
while source_code.text.length > 0u and is_space(char_at(source_code.text, 0)) do
|
||||
if char_at(source_code.text, 0) = '\n' then
|
||||
source_code.position.line := source_code.position.line + 1u;
|
||||
source_code.position.column := 1u
|
||||
else
|
||||
source_code.position.column := source_code.position.column + 1u
|
||||
end;
|
||||
source_code.text := open_substring(source_code.text, 1u)
|
||||
end;
|
||||
return input
|
||||
return source_code
|
||||
end
|
||||
|
||||
proc lex_identifier(input: pointer to Char): pointer to Char;
|
||||
@ -273,19 +315,29 @@ begin
|
||||
return input
|
||||
end
|
||||
|
||||
proc lex_comment(input: pointer to Char): pointer to Char;
|
||||
proc lex_comment(source_code: pointer to SourceCode, token_content: pointer to String): Bool;
|
||||
var
|
||||
next: pointer to Char;
|
||||
result: pointer to Char;
|
||||
begin
|
||||
while input^ <> '\0' do
|
||||
next := input + 1;
|
||||
token_content^.ptr := source_code^.text.ptr;
|
||||
token_content^.length := 0u;
|
||||
|
||||
if input^ = '*' and next^ = ')' then
|
||||
return next + 1
|
||||
while source_code^.text.length > 1u do
|
||||
if char_at(source_code^.text, 0) = '*' and char_at(source_code^.text, 1) = ')' then
|
||||
source_code^ := advance_source(source_code^, 2u);
|
||||
|
||||
result := cast(malloc(token_content^.length) as pointer to Char);
|
||||
strncpy(result, token_content^.ptr, token_content^.length);
|
||||
token_content^.ptr := result;
|
||||
|
||||
return true
|
||||
end;
|
||||
input := next
|
||||
token_content^.length := token_content^.length + 1u;
|
||||
source_code^ := advance_source(source_code^, 1)
|
||||
end;
|
||||
return nil
|
||||
token_content^.ptr := nil;
|
||||
token_content^.length := 0u;
|
||||
return false
|
||||
end
|
||||
|
||||
proc lex_character(input: pointer to Char, current_token: pointer to Token): pointer to Char;
|
||||
@ -458,7 +510,7 @@ begin
|
||||
elsif current_token^.kind = TOKEN_AT then
|
||||
write_c('@')
|
||||
elsif current_token^.kind = TOKEN_COMMENT then
|
||||
write_s("COMMENT")
|
||||
write_s("(* COMMENT *)")
|
||||
elsif current_token^.kind = TOKEN_INTEGER then
|
||||
write_c('<');
|
||||
write_i(current_token^.value.int_value);
|
||||
@ -562,154 +614,167 @@ begin
|
||||
return current_token
|
||||
end
|
||||
|
||||
proc tokenize(input_pointer: pointer to Char, tokens_size: pointer to Word): pointer to Token;
|
||||
proc tokenize(source_code: SourceCode, tokens_size: pointer to Word): pointer to Token;
|
||||
var
|
||||
token_end: pointer to Char,
|
||||
tokens: pointer to Token,
|
||||
current_token: pointer to Token,
|
||||
token_length: Word;
|
||||
token_length: Word,
|
||||
first_char: Char,
|
||||
token_content: String;
|
||||
begin
|
||||
tokens_size^ := 0u;
|
||||
tokens := nil;
|
||||
source_code := skip_spaces(source_code);
|
||||
|
||||
input_pointer := skip_spaces(input_pointer);
|
||||
|
||||
while input_pointer^ <> '\0' do
|
||||
while source_code.text.length <> 0u do
|
||||
tokens := cast(reallocarray(tokens, tokens_size^ + 1u, sizeof(Token)) as pointer to Token);
|
||||
current_token := tokens + tokens_size^;
|
||||
first_char := char_at(source_code.text, 0);
|
||||
|
||||
if is_alpha(input_pointer^) or input_pointer^ = '_' then
|
||||
token_end := lex_identifier(input_pointer + 1);
|
||||
token_length := cast(token_end as Word) - cast(input_pointer as Word);
|
||||
if is_alpha(first_char) or first_char = '_' then
|
||||
token_end := lex_identifier(source_code.text.ptr + 1);
|
||||
token_length := cast(token_end - source_code.text.ptr as Word);
|
||||
|
||||
current_token^ := categorize_identifier(input_pointer, token_length);
|
||||
current_token^ := categorize_identifier(source_code.text.ptr, token_length);
|
||||
|
||||
input_pointer := token_end
|
||||
elsif is_digit(input_pointer^) then
|
||||
source_code := advance_source(source_code, token_length)
|
||||
elsif is_digit(first_char) then
|
||||
token_end := nil;
|
||||
current_token^.value.int_value := strtol(input_pointer, @token_end, 10);
|
||||
current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10);
|
||||
token_length := cast(token_end - source_code.text.ptr as Word);
|
||||
|
||||
if token_end^ = 'u' then
|
||||
current_token^.kind := TOKEN_WORD;
|
||||
input_pointer := token_end + 1
|
||||
source_code := advance_source(source_code, token_length + 1u)
|
||||
else
|
||||
current_token^.kind := TOKEN_INTEGER;
|
||||
input_pointer := token_end
|
||||
source_code := advance_source(source_code, token_length)
|
||||
end
|
||||
elsif input_pointer^ = '(' then
|
||||
input_pointer := input_pointer + 1;
|
||||
if input_pointer^ = '*' then
|
||||
token_end := lex_comment(input_pointer + 1);
|
||||
elsif first_char = '(' then
|
||||
source_code := advance_source(source_code, 1u);
|
||||
|
||||
if token_end <> nil then
|
||||
token_length := cast(token_end as Word) - cast(input_pointer as Word);
|
||||
current_token^.value.string_value := cast(calloc(token_length + 1u, 1) as pointer to Char);
|
||||
strncpy(current_token^.value.string_value, input_pointer, token_length);
|
||||
current_token^.kind := TOKEN_COMMENT;
|
||||
if source_code.text.length = 0u then
|
||||
current_token^.kind := TOKEN_LEFT_PAREN
|
||||
elsif char_at(source_code.text, 0u) = '*' then
|
||||
source_code := advance_source(source_code, 1u);
|
||||
|
||||
input_pointer := token_end
|
||||
if lex_comment(@source_code, @token_content) then
|
||||
current_token^.value.string := token_content;
|
||||
current_token^.kind := TOKEN_COMMENT
|
||||
else
|
||||
current_token^.kind := 0
|
||||
end
|
||||
else
|
||||
current_token^.kind := TOKEN_LEFT_PAREN
|
||||
end
|
||||
elsif input_pointer^ = ')' then
|
||||
elsif first_char = ')' then
|
||||
current_token^.kind := TOKEN_RIGHT_PAREN;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '\'' then
|
||||
token_end := lex_character(input_pointer + 1, current_token);
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '\'' then
|
||||
token_end := lex_character(source_code.text.ptr + 1, current_token);
|
||||
token_length := cast(token_end - source_code.text.ptr as Word);
|
||||
|
||||
if token_end^ = '\'' then
|
||||
current_token^.kind := TOKEN_CHARACTER;
|
||||
input_pointer := token_end + 1
|
||||
source_code := advance_source(source_code, token_length + 1u)
|
||||
else
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
end
|
||||
elsif input_pointer^ = '"' then
|
||||
token_end := lex_string(input_pointer + 1, current_token);
|
||||
elsif first_char = '"' then
|
||||
token_end := lex_string(source_code.text.ptr + 1, current_token);
|
||||
|
||||
if token_end^ = '"' then
|
||||
current_token^.kind := TOKEN_STRING;
|
||||
input_pointer := token_end + 1
|
||||
token_length := cast(token_end - source_code.text.ptr as Word);
|
||||
source_code := advance_source(source_code, token_length + 1u)
|
||||
end
|
||||
elsif input_pointer^ = '[' then
|
||||
elsif first_char = '[' then
|
||||
current_token^.kind := TOKEN_LEFT_SQUARE;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = ']' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = ']' then
|
||||
current_token^.kind := TOKEN_RIGHT_SQUARE;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '>' then
|
||||
input_pointer := input_pointer + 1;
|
||||
if input_pointer^ = '=' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '>' then
|
||||
source_code := advance_source(source_code, 1u);
|
||||
|
||||
if source_code.text.length = 0u then
|
||||
current_token^.kind := TOKEN_GREATER_THAN
|
||||
elsif char_at(source_code.text, 0) = '=' then
|
||||
current_token^.kind := TOKEN_GREATER_EQUAL;
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
else
|
||||
current_token^.kind := TOKEN_GREATER_THAN
|
||||
end
|
||||
elsif input_pointer^ = '<' then
|
||||
input_pointer := input_pointer + 1;
|
||||
if input_pointer^ = '=' then
|
||||
elsif first_char = '<' then
|
||||
source_code := advance_source(source_code, 1u);
|
||||
|
||||
if source_code.text.length = 0u then
|
||||
current_token^.kind := TOKEN_LESS_THAN
|
||||
elsif char_at(source_code.text, 0) = '=' then
|
||||
current_token^.kind := TOKEN_LESS_EQUAL;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '>' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif char_at(source_code.text, 0) = '>' then
|
||||
current_token^.kind := TOKEN_NOT_EQUAL;
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
else
|
||||
current_token^.kind := TOKEN_LESS_THAN
|
||||
end
|
||||
elsif input_pointer^ = '=' then
|
||||
elsif first_char = '=' then
|
||||
current_token^.kind := TOKEN_EQUAL;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = ';' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = ';' then
|
||||
current_token^.kind := TOKEN_SEMICOLON;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '.' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '.' then
|
||||
current_token^.kind := TOKEN_DOT;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = ',' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = ',' then
|
||||
current_token^.kind := TOKEN_COMMA;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '+' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '+' then
|
||||
current_token^.kind := TOKEN_PLUS;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '-' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '-' then
|
||||
current_token^.kind := TOKEN_MINUS;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '*' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '*' then
|
||||
current_token^.kind := TOKEN_MULTIPLICATION;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '/' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '/' then
|
||||
current_token^.kind := TOKEN_DIVISION;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '%' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '%' then
|
||||
current_token^.kind := TOKEN_REMAINDER;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = ':' then
|
||||
input_pointer := input_pointer + 1;
|
||||
if input_pointer^ = '=' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = ':' then
|
||||
source_code := advance_source(source_code, 1u);
|
||||
|
||||
if source_code.text.length = 0u then
|
||||
current_token^.kind := TOKEN_COLON
|
||||
elsif char_at(source_code.text, 0) = '=' then
|
||||
current_token^.kind := TOKEN_ASSIGNMENT;
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
else
|
||||
current_token^.kind := TOKEN_COLON
|
||||
end
|
||||
elsif input_pointer^ = '^' then
|
||||
elsif first_char = '^' then
|
||||
current_token^.kind := TOKEN_HAT;
|
||||
input_pointer := input_pointer + 1
|
||||
elsif input_pointer^ = '@' then
|
||||
source_code := advance_source(source_code, 1u)
|
||||
elsif first_char = '@' then
|
||||
current_token^.kind := TOKEN_AT;
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
else
|
||||
current_token^.kind := 0;
|
||||
input_pointer := input_pointer + 1
|
||||
source_code := advance_source(source_code, 1u)
|
||||
end;
|
||||
|
||||
if current_token^.kind <> 0 then
|
||||
tokens_size^ := tokens_size^ + 1u;
|
||||
input_pointer := skip_spaces(input_pointer)
|
||||
source_code := skip_spaces(source_code)
|
||||
else
|
||||
write_s("Lexical analysis error on \"");
|
||||
write_c(input_pointer^);
|
||||
write_c(first_char);
|
||||
write_s("\".\n")
|
||||
end
|
||||
end;
|
||||
@ -819,9 +884,9 @@ end
|
||||
|
||||
proc process(argc: Int, argv: pointer to pointer to Char): Int;
|
||||
var
|
||||
input: pointer to Char,
|
||||
tokens: pointer to Token,
|
||||
tokens_size: Word,
|
||||
source_code: SourceCode,
|
||||
command_line: pointer to CommandLine;
|
||||
begin
|
||||
command_line := parse_command_line(argc, argv);
|
||||
@ -829,12 +894,12 @@ begin
|
||||
return 2
|
||||
end;
|
||||
|
||||
input := read_source(command_line^.input);
|
||||
if input = nil then
|
||||
source_code.position := make_position();
|
||||
if not read_source(command_line^.input, @source_code.text) then
|
||||
perror(command_line^.input);
|
||||
return 3
|
||||
end;
|
||||
tokens := tokenize(input, @tokens_size);
|
||||
tokens := tokenize(source_code, @tokens_size);
|
||||
|
||||
if command_line^.tokenize then
|
||||
print_tokens(tokens, tokens_size)
|
||||
|
Reference in New Issue
Block a user