999 lines
24 KiB
Plaintext
999 lines
24 KiB
Plaintext
const
|
|
SEEK_SET* := 0;
|
|
SEEK_CUR* := 1;
|
|
SEEK_END* := 2;
|
|
|
|
type
|
|
TokenKind* = (
|
|
unknown,
|
|
identifier,
|
|
_if,
|
|
_then,
|
|
_else,
|
|
_elsif,
|
|
_while,
|
|
_do,
|
|
_proc,
|
|
_begin,
|
|
_end,
|
|
_extern,
|
|
_const,
|
|
_var,
|
|
array,
|
|
_of,
|
|
_type,
|
|
_record,
|
|
_union,
|
|
pointer,
|
|
to,
|
|
boolean,
|
|
_nil,
|
|
and,
|
|
_or,
|
|
not,
|
|
_return,
|
|
_cast,
|
|
shift_left,
|
|
shift_right,
|
|
left_paren,
|
|
right_paren,
|
|
left_square,
|
|
right_square,
|
|
greater_equal,
|
|
less_equal,
|
|
greater_than,
|
|
less_than,
|
|
not_equal,
|
|
equal,
|
|
semicolon,
|
|
dot,
|
|
comma,
|
|
plus,
|
|
minus,
|
|
multiplication,
|
|
division,
|
|
remainder,
|
|
assignment,
|
|
colon,
|
|
hat,
|
|
at,
|
|
comment,
|
|
integer,
|
|
word,
|
|
character,
|
|
string,
|
|
_defer,
|
|
exclamation,
|
|
arrow
|
|
);
|
|
Position* = record
|
|
line: Word;
|
|
column: Word
|
|
end;
|
|
Location* = record
|
|
first: Position;
|
|
last: Position
|
|
end;
|
|
SourceFile* = record
|
|
buffer: [1024]Char;
|
|
handle: ^FILE;
|
|
size: Word;
|
|
index: Word
|
|
end;
|
|
FILE* = record end;
|
|
StringBuffer* = record
|
|
data: ^Byte;
|
|
size: Word;
|
|
capacity: Word
|
|
end;
|
|
SourceCode = record
|
|
position: Position;
|
|
|
|
input: ^Byte;
|
|
empty: proc(^Byte) -> Bool;
|
|
advance: proc(^Byte);
|
|
head: proc(^Byte) -> Char
|
|
end;
|
|
Token* = record
|
|
kind: TokenKind;
|
|
value: union
|
|
int_value: Int;
|
|
string: String;
|
|
boolean_value: Bool;
|
|
char_value: Char
|
|
end;
|
|
location: Location
|
|
end;
|
|
CommandLine* = record
|
|
input: ^Char;
|
|
tokenize: Bool;
|
|
syntax_tree: Bool
|
|
end;
|
|
|
|
(*
|
|
External procedures.
|
|
*)
|
|
proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern
|
|
proc fclose(stream: ^FILE) -> Int; extern
|
|
proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern
|
|
proc rewind(stream: ^FILE); extern
|
|
proc ftell(stream: ^FILE) -> Int; extern
|
|
proc fread(ptr: ^Byte, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern
|
|
proc write(fd: Int, buf: ^Byte, Word: Int) -> Int; extern
|
|
|
|
proc malloc(size: Word) -> ^Byte; extern
|
|
proc free(ptr: ^Byte); extern
|
|
proc calloc(nmemb: Word, size: Word) -> ^Byte; extern
|
|
proc realloc(ptr: ^Byte, size: Word) -> ^Byte; extern
|
|
|
|
proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern
|
|
|
|
proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern
|
|
proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern
|
|
proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
|
|
proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
|
|
proc strlen(ptr: ^Char) -> Word; extern
|
|
|
|
proc perror(s: ^Char); extern
|
|
proc exit(code: Int) -> !; extern
|
|
|
|
(*
|
|
Standard procedures.
|
|
*)
|
|
proc reallocarray(ptr: ^Byte, n: Word, size: Word) -> ^Byte;
|
|
begin
|
|
return realloc(ptr, n * size)
|
|
end
|
|
|
|
proc write_s(value: String);
|
|
begin
|
|
write(0, cast(value.ptr: ^Byte), cast(value.length: Int))
|
|
end
|
|
|
|
proc write_z(value: ^Char);
|
|
begin
|
|
write(0, cast(value: ^Byte), cast(strlen(value): Int))
|
|
end
|
|
|
|
proc write_b(value: Bool);
|
|
begin
|
|
if value then
|
|
write_s("true")
|
|
else
|
|
write_s("false")
|
|
end
|
|
end
|
|
|
|
proc write_c(value: Char);
|
|
begin
|
|
write(0, cast(@value: ^Byte), 1)
|
|
end
|
|
|
|
proc write_i(value: Int);
|
|
var
|
|
digit: Int;
|
|
n: Word;
|
|
buffer: [10]Char;
|
|
begin
|
|
n := 10u;
|
|
|
|
if value = 0 then
|
|
write_c('0')
|
|
end;
|
|
while value <> 0 do
|
|
digit := value % 10;
|
|
value := value / 10;
|
|
|
|
buffer[n] := cast(cast('0': Int) + digit: Char);
|
|
n := n - 1u
|
|
end;
|
|
while n < 10u do
|
|
n := n + 1u;
|
|
write_c(buffer[n])
|
|
end
|
|
end
|
|
|
|
proc write_u(value: Word);
|
|
begin
|
|
write_i(cast(value: Int))
|
|
end
|
|
|
|
proc is_digit(c: Char) -> Bool;
|
|
begin
|
|
return cast(c: Int) >= cast('0': Int) & cast(c: Int) <= cast('9': Int)
|
|
end
|
|
|
|
proc is_alpha(c: Char) -> Bool;
|
|
begin
|
|
return cast(c: Int) >= cast('A': Int) & cast(c: Int) <= cast('z': Int)
|
|
end
|
|
|
|
proc is_alnum(c: Char) -> Bool;
|
|
begin
|
|
return is_digit(c) or is_alpha(c)
|
|
end
|
|
|
|
proc is_space(c: Char) -> Bool;
|
|
begin
|
|
return c = ' ' or c = '\n' or c = '\t'
|
|
end
|
|
|
|
proc substring(string: String, start: Word, count: Word) -> String;
|
|
begin
|
|
return String(string.ptr + start, count)
|
|
end
|
|
|
|
proc open_substring(string: String, start: Word) -> String;
|
|
begin
|
|
return substring(string, start, string.length - start)
|
|
end
|
|
|
|
proc string_dup(origin: String) -> String;
|
|
var
|
|
copy: ^Char;
|
|
begin
|
|
copy := cast(malloc(origin.length): ^Char);
|
|
strncpy(copy, origin.ptr, origin.length);
|
|
|
|
return String(copy, origin.length)
|
|
end
|
|
|
|
proc string_buffer_new() -> StringBuffer;
|
|
var
|
|
result: StringBuffer;
|
|
begin
|
|
result.capacity := 64u;
|
|
result.data := malloc(result.capacity);
|
|
result.size := 0u;
|
|
|
|
return result
|
|
end
|
|
|
|
proc string_buffer_push(buffer: ^StringBuffer, char: Char);
|
|
begin
|
|
if buffer^.size >= buffer^.capacity then
|
|
buffer^.capacity := buffer^.capacity + 1024u;
|
|
buffer^.data := realloc(buffer^.data, buffer^.capacity)
|
|
end;
|
|
(buffer^.data + buffer^.size)^ := cast(char: Byte);
|
|
buffer^.size := buffer^.size + 1u
|
|
end
|
|
|
|
proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
|
|
begin
|
|
buffer^.size := buffer^.size - count
|
|
end
|
|
|
|
proc string_buffer_clear(buffer: ^StringBuffer) -> String;
|
|
var
|
|
result: String;
|
|
begin
|
|
result := String(cast(buffer^.data: ^Char), buffer^.size);
|
|
buffer^.size := 0u;
|
|
return result
|
|
end
|
|
|
|
(*
|
|
End of standard procedures.
|
|
*)
|
|
|
|
proc make_position() -> Position;
|
|
begin
|
|
return Position(1u, 1u)
|
|
end
|
|
|
|
proc read_source(filename: ^Char) -> ^SourceFile;
|
|
var
|
|
result: ^SourceFile;
|
|
file_handle: ^FILE;
|
|
begin
|
|
file_handle := fopen(filename, "rb\0".ptr);
|
|
|
|
if file_handle <> nil then
|
|
result := cast(malloc(#size(SourceFile)): ^SourceFile);
|
|
result^.handle := file_handle;
|
|
result^.size := 0u;
|
|
result^.index := 1u
|
|
end;
|
|
return result
|
|
end
|
|
|
|
proc escape_char(escape: Char, result: ^Char) -> Bool;
|
|
var
|
|
successful: Bool;
|
|
begin
|
|
if escape = 'n' then
|
|
result^ := '\n';
|
|
successful := true;
|
|
elsif escape = 'a' then
|
|
result^ := '\a';
|
|
successful := true
|
|
elsif escape = 'b' then
|
|
result^ := '\b';
|
|
successful := true
|
|
elsif escape = 't' then
|
|
result^ := '\t';
|
|
successful := true
|
|
elsif escape = 'f' then
|
|
result^ := '\f';
|
|
successful := true
|
|
elsif escape = 'r' then
|
|
result^ := '\r';
|
|
successful := true
|
|
elsif escape = 'v' then
|
|
result^ := '\v';
|
|
successful := true
|
|
elsif escape = '\\' then
|
|
result^ := '\\';
|
|
successful := true
|
|
elsif escape = '\'' then
|
|
result^ := '\'';
|
|
successful := true
|
|
elsif escape = '"' then
|
|
result^ := '"';
|
|
successful := true
|
|
elsif escape = '?' then
|
|
result^ := '\?';
|
|
successful := true
|
|
elsif escape = '0' then
|
|
result^ := '\0';
|
|
successful := true
|
|
else
|
|
successful := false
|
|
end;
|
|
return successful
|
|
end
|
|
|
|
proc source_file_empty(source_input: ^Byte) -> Bool;
|
|
var
|
|
source_file: ^SourceFile;
|
|
begin
|
|
source_file := cast(source_input: ^SourceFile);
|
|
|
|
if source_file^.index > source_file^.size then
|
|
source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle);
|
|
source_file^.index := 1u
|
|
end;
|
|
|
|
return source_file^.size = 0u
|
|
end
|
|
|
|
proc source_file_head(source_input: ^Byte) -> Char;
|
|
var
|
|
source_file: ^SourceFile;
|
|
begin
|
|
source_file := cast(source_input: ^SourceFile);
|
|
|
|
return source_file^.buffer[source_file^.index]
|
|
end
|
|
|
|
proc source_file_advance(source_input: ^Byte);
|
|
var
|
|
source_file: ^SourceFile;
|
|
begin
|
|
source_file := cast(source_input: ^SourceFile);
|
|
|
|
source_file^.index := source_file^.index + 1u
|
|
end
|
|
|
|
proc source_code_empty(source_code: ^SourceCode) -> Bool;
|
|
begin
|
|
return source_code^.empty(source_code^.input)
|
|
end
|
|
|
|
proc source_code_head(source_code: SourceCode) -> Char;
|
|
begin
|
|
return source_code.head(source_code.input)
|
|
end
|
|
|
|
proc source_code_advance(source_code: ^SourceCode);
|
|
begin
|
|
source_code^.advance(source_code^.input);
|
|
source_code^.position.column := source_code^.position.column
|
|
end
|
|
|
|
proc source_code_break(source_code: ^SourceCode);
|
|
begin
|
|
source_code^.position.line := source_code^.position.line + 1u;
|
|
source_code^.position.column := 0u
|
|
end
|
|
|
|
proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
|
|
begin
|
|
return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
|
|
end
|
|
|
|
proc skip_spaces(source_code: ^SourceCode);
|
|
var
|
|
current: Char;
|
|
begin
|
|
while ~source_code_empty(source_code), loop do
|
|
current := source_code_head(source_code^);
|
|
|
|
if ~is_space(current) then
|
|
break loop
|
|
elsif current = '\n' then
|
|
source_code_break(source_code)
|
|
end;
|
|
source_code_advance(source_code)
|
|
end
|
|
end
|
|
|
|
proc is_ident(char: Char) -> Bool;
|
|
begin
|
|
return is_alnum(char) or char = '_'
|
|
end
|
|
|
|
proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
|
|
var
|
|
content_length: Word;
|
|
begin
|
|
while ~source_code_empty(source_code) & is_ident(source_code_head(source_code^)) do
|
|
string_buffer_push(token_content, source_code_head(source_code^));
|
|
source_code_advance(source_code)
|
|
end
|
|
end
|
|
|
|
proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
|
var
|
|
trailing: Word;
|
|
begin
|
|
trailing := 0u;
|
|
|
|
while ~source_code_empty(source_code) & trailing < 2u do
|
|
if source_code_head(source_code^) = '*' then
|
|
string_buffer_push(token_content, '*');
|
|
trailing := 1u
|
|
elsif source_code_head(source_code^) = ')' & trailing = 1u then
|
|
string_buffer_pop(token_content, 1u);
|
|
trailing := 2u
|
|
else
|
|
string_buffer_push(token_content, source_code_head(source_code^));
|
|
trailing := 0u
|
|
end;
|
|
source_code_advance(source_code)
|
|
end;
|
|
|
|
return trailing = 2u
|
|
end
|
|
|
|
proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
|
|
var
|
|
successful: Bool;
|
|
begin
|
|
successful := ~source_code_empty(source_code);
|
|
|
|
if successful then
|
|
if source_code_head(source_code^) = '\\' then
|
|
source_code_advance(source_code);
|
|
|
|
successful := ~source_code_empty(source_code) & escape_char(source_code_head(source_code^), token_content)
|
|
else
|
|
token_content^ := source_code_head(source_code^);
|
|
successful := true
|
|
end
|
|
end;
|
|
if successful then
|
|
source_code_advance(source_code)
|
|
end;
|
|
return successful
|
|
end
|
|
|
|
proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
|
|
var
|
|
token_end, constructed_string: ^Char;
|
|
token_length: Word;
|
|
is_valid: Bool;
|
|
next_char: Char;
|
|
begin
|
|
is_valid := true;
|
|
|
|
while is_valid & ~source_code_empty(source_code) & source_code_head(source_code^) <> '"' do
|
|
is_valid := lex_character(source_code, @next_char);
|
|
|
|
if is_valid then
|
|
string_buffer_push(token_content, next_char)
|
|
end
|
|
end;
|
|
|
|
if is_valid & source_code_expect(source_code, '"') then
|
|
source_code_advance(source_code)
|
|
else
|
|
is_valid := false
|
|
end;
|
|
return is_valid
|
|
end
|
|
|
|
proc lex_number(source_code: ^SourceCode, token_content: ^Int);
|
|
begin
|
|
token_content^ := 0;
|
|
|
|
while ~source_code_empty(source_code) & is_digit(source_code_head(source_code^)) do
|
|
token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int));
|
|
|
|
source_code_advance(source_code)
|
|
end
|
|
end
|
|
|
|
proc print_tokens(tokens: ^Token, tokens_size: Word);
|
|
var
|
|
current_token: ^Token;
|
|
i: Word;
|
|
begin
|
|
i := 0u;
|
|
while i < tokens_size do
|
|
current_token := tokens + i;
|
|
|
|
case current_token^.kind of
|
|
TokenKind._if:
|
|
write_s("IF")
|
|
| TokenKind._then:
|
|
write_s("THEN")
|
|
| TokenKind._else:
|
|
write_s("ELSE")
|
|
| TokenKind._elsif:
|
|
write_s("ELSIF")
|
|
| TokenKind._while:
|
|
write_s("WHILE")
|
|
| TokenKind._do:
|
|
write_s("DO")
|
|
| TokenKind._proc:
|
|
write_s("PROC")
|
|
| TokenKind._begin:
|
|
write_s("BEGIN")
|
|
| TokenKind._end:
|
|
write_s("END")
|
|
| TokenKind._extern:
|
|
write_s("EXTERN")
|
|
| TokenKind._const:
|
|
write_s("CONST")
|
|
| TokenKind._var:
|
|
write_s("VAR")
|
|
| TokenKind.array:
|
|
write_s("ARRAY")
|
|
| TokenKind._of:
|
|
write_s("OF")
|
|
| TokenKind._type:
|
|
write_s("TYPE")
|
|
| TokenKind._record:
|
|
write_s("RECORD")
|
|
| TokenKind._union:
|
|
write_s("UNION")
|
|
| TokenKind.pointer:
|
|
write_s("POINTER")
|
|
| TokenKind.to:
|
|
write_s("TO")
|
|
| TokenKind.boolean:
|
|
write_s("BOOLEAN<");
|
|
write_b(current_token^.value.boolean_value);
|
|
write_c('>')
|
|
| TokenKind._nil:
|
|
write_s("NIL")
|
|
| TokenKind.and:
|
|
write_s("AND")
|
|
| TokenKind._or:
|
|
write_s("OR")
|
|
| TokenKind.not:
|
|
write_s("NOT")
|
|
| TokenKind._return:
|
|
write_s("RETURN")
|
|
| TokenKind._cast:
|
|
write_s("CAST")
|
|
| TokenKind.shift_left:
|
|
write_s("<<")
|
|
| TokenKind.shift_right:
|
|
write_s(">>")
|
|
| TokenKind.identifier:
|
|
write_c('<');
|
|
write_s(current_token^.value.string);
|
|
write_c('>')
|
|
| TokenKind.left_paren:
|
|
write_s("(")
|
|
| TokenKind.right_paren:
|
|
write_s(")")
|
|
| TokenKind.left_square:
|
|
write_s("[")
|
|
| TokenKind.right_square:
|
|
write_s("]")
|
|
| TokenKind.greater_equal:
|
|
write_s(">=")
|
|
| TokenKind.less_equal:
|
|
write_s("<=")
|
|
| TokenKind.greater_than:
|
|
write_s(">")
|
|
| TokenKind.less_than:
|
|
write_s("<")
|
|
| TokenKind.equal:
|
|
write_s("=")
|
|
| TokenKind.not_equal:
|
|
write_s("<>")
|
|
| TokenKind.semicolon:
|
|
write_c(';')
|
|
| TokenKind.dot:
|
|
write_c('.')
|
|
| TokenKind.comma:
|
|
write_c(',')
|
|
| TokenKind.plus:
|
|
write_c('+')
|
|
| TokenKind.minus:
|
|
write_c('-')
|
|
| TokenKind.multiplication:
|
|
write_c('*')
|
|
| TokenKind.division:
|
|
write_c('/')
|
|
| TokenKind.remainder:
|
|
write_c('%')
|
|
| TokenKind.assignment:
|
|
write_s(":=")
|
|
| TokenKind.colon:
|
|
write_c(':')
|
|
| TokenKind.hat:
|
|
write_c('^')
|
|
| TokenKind.at:
|
|
write_c('@')
|
|
| TokenKind.comment:
|
|
write_s("(* COMMENT *)")
|
|
| TokenKind.integer:
|
|
write_c('<');
|
|
write_i(current_token^.value.int_value);
|
|
write_c('>')
|
|
| TokenKind.word:
|
|
write_c('<');
|
|
write_i(current_token^.value.int_value);
|
|
write_s("u>")
|
|
| TokenKind.character:
|
|
write_c('<');
|
|
write_i(cast(current_token^.value.char_value: Int));
|
|
write_s("c>")
|
|
| TokenKind.string:
|
|
write_s("\"...\"")
|
|
| TokenKind._defer:
|
|
write_s("DEFER")
|
|
| TokenKind.exclamation:
|
|
write_c('!')
|
|
| TokenKind.arrow:
|
|
write_s("->")
|
|
else
|
|
write_s("UNKNOWN<");
|
|
write_i(cast(current_token^.kind: Int));
|
|
write_c('>')
|
|
end;
|
|
write_c(' ');
|
|
|
|
i := i + 1u;
|
|
end;
|
|
write_c('\n')
|
|
end
|
|
|
|
proc categorize_identifier(token_content: String) -> Token;
|
|
var
|
|
current_token: Token;
|
|
begin
|
|
if "if" = token_content then
|
|
current_token.kind := TokenKind._if
|
|
elsif "then" = token_content then
|
|
current_token.kind := TokenKind._then
|
|
elsif "else" = token_content then
|
|
current_token.kind := TokenKind._else
|
|
elsif "elsif" = token_content then
|
|
current_token.kind := TokenKind._elsif
|
|
elsif "while" = token_content then
|
|
current_token.kind := TokenKind._while
|
|
elsif "do" = token_content then
|
|
current_token.kind := TokenKind._do
|
|
elsif "proc" = token_content then
|
|
current_token.kind := TokenKind._proc
|
|
elsif "begin" = token_content then
|
|
current_token.kind := TokenKind._begin
|
|
elsif "end" = token_content then
|
|
current_token.kind := TokenKind._end
|
|
elsif "extern" = token_content then
|
|
current_token.kind := TokenKind._extern
|
|
elsif "const" = token_content then
|
|
current_token.kind := TokenKind._const
|
|
elsif "var" = token_content then
|
|
current_token.kind := TokenKind._var
|
|
elsif "array" = token_content then
|
|
current_token.kind := TokenKind.array
|
|
elsif "of" = token_content then
|
|
current_token.kind := TokenKind._of
|
|
elsif "type" = token_content then
|
|
current_token.kind := TokenKind._type
|
|
elsif "record" = token_content then
|
|
current_token.kind := TokenKind._record
|
|
elsif "union" = token_content then
|
|
current_token.kind := TokenKind._union
|
|
elsif "pointer" = token_content then
|
|
current_token.kind := TokenKind.pointer
|
|
elsif "to" = token_content then
|
|
current_token.kind := TokenKind.to
|
|
elsif "true" = token_content then
|
|
current_token.kind := TokenKind.boolean;
|
|
current_token.value.boolean_value := true
|
|
elsif "false" = token_content then
|
|
current_token.kind := TokenKind.boolean;
|
|
current_token.value.boolean_value := false
|
|
elsif "nil" = token_content then
|
|
current_token.kind := TokenKind._nil
|
|
elsif "and" = token_content then
|
|
current_token.kind := TokenKind.and
|
|
elsif "or" = token_content then
|
|
current_token.kind := TokenKind._or
|
|
elsif "not" = token_content then
|
|
current_token.kind := TokenKind.not
|
|
elsif "return" = token_content then
|
|
current_token.kind := TokenKind._return
|
|
elsif "cast" = token_content then
|
|
current_token.kind := TokenKind._cast
|
|
elsif "defer" = token_content then
|
|
current_token.kind := TokenKind._defer
|
|
else
|
|
current_token.kind := TokenKind.identifier;
|
|
current_token.value.string := string_dup(token_content)
|
|
end;
|
|
|
|
return current_token
|
|
end
|
|
|
|
proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token;
|
|
var
|
|
tokens, current_token: ^Token;
|
|
first_char: Char;
|
|
token_buffer: StringBuffer;
|
|
begin
|
|
tokens_size^ := 0u;
|
|
tokens := nil;
|
|
token_buffer := string_buffer_new();
|
|
|
|
skip_spaces(@source_code);
|
|
|
|
while ~source_code_empty(@source_code) do
|
|
tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, #size(Token)): ^Token);
|
|
current_token := tokens + tokens_size^;
|
|
first_char := source_code_head(source_code);
|
|
|
|
if is_alpha(first_char) or first_char = '_' then
|
|
lex_identifier(@source_code, @token_buffer);
|
|
current_token^ := categorize_identifier(string_buffer_clear(@token_buffer))
|
|
elsif is_digit(first_char) then
|
|
lex_number(@source_code, @current_token^.value.int_value);
|
|
|
|
if source_code_expect(@source_code, 'u') then
|
|
current_token^.kind := TokenKind.word;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.integer
|
|
end
|
|
elsif first_char = '(' then
|
|
source_code_advance(@source_code);
|
|
|
|
if source_code_empty(@source_code) then
|
|
current_token^.kind := TokenKind.left_paren
|
|
elsif source_code_head(source_code) = '*' then
|
|
source_code_advance(@source_code);
|
|
|
|
if lex_comment(@source_code, @token_buffer) then
|
|
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer));
|
|
current_token^.kind := TokenKind.comment
|
|
else
|
|
current_token^.kind := TokenKind.unknown
|
|
end
|
|
else
|
|
current_token^.kind := TokenKind.left_paren
|
|
end
|
|
elsif first_char = ')' then
|
|
current_token^.kind := TokenKind.right_paren;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '\'' then
|
|
source_code_advance(@source_code);
|
|
|
|
if lex_character(@source_code, @current_token^.value.char_value) & source_code_expect(@source_code, '\'') then
|
|
current_token^.kind := TokenKind.character;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.unknown
|
|
end
|
|
elsif first_char = '"' then
|
|
source_code_advance(@source_code);
|
|
|
|
if lex_string(@source_code, @token_buffer) then
|
|
current_token^.kind := TokenKind.string;
|
|
current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
|
|
else
|
|
current_token^.kind := TokenKind.unknown
|
|
end
|
|
elsif first_char = '[' then
|
|
current_token^.kind := TokenKind.left_square;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = ']' then
|
|
current_token^.kind := TokenKind.right_square;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '>' then
|
|
source_code_advance(@source_code);
|
|
|
|
if source_code_empty(@source_code) then
|
|
current_token^.kind := TokenKind.greater_than
|
|
elsif source_code_head(source_code) = '=' then
|
|
current_token^.kind := TokenKind.greater_equal;
|
|
source_code_advance(@source_code)
|
|
elsif source_code_head(source_code) = '>' then
|
|
current_token^.kind := TokenKind.shift_right;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.greater_than
|
|
end
|
|
elsif first_char = '<' then
|
|
source_code_advance(@source_code);
|
|
|
|
if source_code_empty(@source_code) then
|
|
current_token^.kind := TokenKind.less_than
|
|
elsif source_code_head(source_code) = '=' then
|
|
current_token^.kind := TokenKind.less_equal;
|
|
source_code_advance(@source_code)
|
|
elsif source_code_head(source_code) = '<' then
|
|
current_token^.kind := TokenKind.shift_left;
|
|
source_code_advance(@source_code)
|
|
elsif source_code_head(source_code) = '>' then
|
|
current_token^.kind := TokenKind.not_equal;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.less_than
|
|
end
|
|
elsif first_char = '=' then
|
|
current_token^.kind := TokenKind.equal;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = ';' then
|
|
current_token^.kind := TokenKind.semicolon;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '.' then
|
|
current_token^.kind := TokenKind.dot;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = ',' then
|
|
current_token^.kind := TokenKind.comma;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '+' then
|
|
current_token^.kind := TokenKind.plus;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '-' then
|
|
source_code_advance(@source_code);
|
|
|
|
if source_code_empty(@source_code) then
|
|
current_token^.kind := TokenKind.minus
|
|
elsif source_code_head(source_code) = '>' then
|
|
current_token^.kind := TokenKind.arrow;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.minus
|
|
end
|
|
elsif first_char = '*' then
|
|
current_token^.kind := TokenKind.multiplication;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '/' then
|
|
current_token^.kind := TokenKind.division;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '%' then
|
|
current_token^.kind := TokenKind.remainder;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = ':' then
|
|
source_code_advance(@source_code);
|
|
|
|
if source_code_empty(@source_code) then
|
|
current_token^.kind := TokenKind.colon
|
|
elsif source_code_head(source_code) = '=' then
|
|
current_token^.kind := TokenKind.assignment;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.colon
|
|
end
|
|
elsif first_char = '^' then
|
|
current_token^.kind := TokenKind.hat;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '@' then
|
|
current_token^.kind := TokenKind.at;
|
|
source_code_advance(@source_code)
|
|
elsif first_char = '!' then
|
|
current_token^.kind := TokenKind.exclamation;
|
|
source_code_advance(@source_code)
|
|
else
|
|
current_token^.kind := TokenKind.unknown;
|
|
source_code_advance(@source_code)
|
|
end;
|
|
|
|
if current_token^.kind <> TokenKind.unknown then
|
|
tokens_size^ := tokens_size^ + 1u;
|
|
skip_spaces(@source_code)
|
|
else
|
|
write_s("Lexical analysis error on \"");
|
|
write_c(first_char);
|
|
write_s("\".\n")
|
|
end
|
|
end;
|
|
|
|
return tokens
|
|
end
|
|
|
|
proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
|
|
var
|
|
parameter: ^^Char;
|
|
i: Int;
|
|
result: ^CommandLine;
|
|
begin
|
|
i := 1;
|
|
result := cast(malloc(#size(CommandLine)): ^CommandLine);
|
|
result^.tokenize := false;
|
|
result^.syntax_tree := false;
|
|
result^.input := nil;
|
|
|
|
while i < argc do
|
|
parameter := argv + i;
|
|
|
|
if strcmp(parameter^, "--tokenize\0".ptr) = 0 then
|
|
result^.tokenize := true
|
|
elsif strcmp(parameter^, "--syntax-tree\0".ptr) = 0 then
|
|
result^.syntax_tree := true
|
|
elsif parameter^^ <> '-' then
|
|
result^.input := parameter^
|
|
else
|
|
write_s("Fatal error: Unknown command line options:");
|
|
|
|
write_c(' ');
|
|
write_z(parameter^);
|
|
write_s(".\n");
|
|
|
|
return nil
|
|
end;
|
|
|
|
i := i + 1
|
|
end;
|
|
if result^.input = nil then
|
|
write_s("Fatal error: no input files.\n");
|
|
return nil
|
|
end;
|
|
|
|
return result
|
|
end
|
|
|
|
proc process(argc: Int, argv: ^^Char) -> Int;
|
|
var
|
|
tokens: ^Token;
|
|
tokens_size: Word;
|
|
source_code: SourceCode;
|
|
command_line: ^CommandLine;
|
|
return_code: Int;
|
|
begin
|
|
return_code := 0;
|
|
|
|
command_line := parse_command_line(argc, argv);
|
|
if command_line = nil then
|
|
return_code := 2
|
|
end;
|
|
|
|
if return_code = 0 then
|
|
source_code.position := make_position();
|
|
|
|
source_code.input := cast(read_source(command_line^.input): ^Byte);
|
|
source_code.empty := source_file_empty;
|
|
source_code.head := source_file_head;
|
|
source_code.advance := source_file_advance;
|
|
|
|
if source_code.input = nil then
|
|
perror(command_line^.input);
|
|
return_code := 3
|
|
end
|
|
end;
|
|
if return_code = 0 then
|
|
tokens := tokenize(source_code, @tokens_size);
|
|
|
|
fclose(cast(source_code.input: ^SourceFile)^.handle);
|
|
|
|
if command_line^.tokenize then
|
|
print_tokens(tokens, tokens_size)
|
|
end
|
|
end;
|
|
return return_code
|
|
end
|
|
|
|
begin
|
|
exit(process(count, parameters))
|
|
end.
|