Replace type expression with traits

2025-02-25 23:39:31 +01:00
parent 85b6843ecf
commit f091344cce
7 changed files with 423 additions and 373 deletions
--- a/source.elna
+++ b/source.elna
@ -72,13 +72,28 @@ type
    first: Position
 	last: Position
  end
+  FILE* = record end
+  SourceFile* = record
+	buffer: [1024]Char
+	handle: ^FILE
+	size: Word
+	index: Word
+  end
+  StringBuffer* = record
+    data: ^Byte
+    size: Word
+    capacity: Word
+  end
  SourceCode = record
    position: Position
-	text: String
+
+	input: ^Byte
+	empty: proc(data: ^Byte) -> Bool
+	advance: proc(data: ^Byte)
+	head: proc(data: ^Byte) -> Char
  end
  TokenValue* = union
    int_value: Int
-    string_value: ^Char
    string: String
 	boolean_value: Bool
 	char_value: Char
@ -88,7 +103,6 @@ type
    value: TokenValue
 	location: Location
  end
-  FILE* = record end
  CommandLine* = record
    input: ^Char
 	tokenize: Bool
@ -119,8 +133,6 @@ proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
 proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
 proc strlen(ptr: ^Char) -> Word; extern

-proc strtol(nptr: ^Char, endptr: ^^Char, base: Int) -> Int; extern
-
 proc perror(s: ^Char); extern
 proc exit(code: Int) -> !; extern

@ -225,6 +237,41 @@ begin
  return String(copy, origin.length)
 end

+proc string_buffer_new() -> StringBuffer;
+var
+  result: StringBuffer
+begin
+  result.capacity := 64u
+  result.data := malloc(result.capacity)
+  result.size := 0u
+
+  return result
+end
+
+proc string_buffer_push(buffer: ^StringBuffer, char: Char);
+begin
+  if buffer^.size >= buffer^.capacity then
+    buffer^.capacity := buffer^.capacity + 1024u
+    buffer^.data := realloc(buffer^.data, buffer^.capacity)
+  end
+  (buffer^.data + buffer^.size)^ := cast(char: Byte)
+  buffer^.size := buffer^.size + 1u
+end
+
+proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
+begin
+  buffer^.size := buffer^.size - count
+end
+
+proc string_buffer_clear(buffer: ^StringBuffer) -> String;
+var
+  result: String
+begin
+  result := String(cast(buffer^.data: ^Char), buffer^.size)
+  buffer^.size := 0u
+  return result
+end
+
 (*
  End of standard procedures.
 *)
@ -234,36 +281,20 @@ begin
  return Position(1u, 1u)
 end

-proc read_source(filename: ^Char, result: ^String) -> Bool;
+proc read_source(filename: ^Char) -> ^SourceFile;
 var
-  input_file: ^FILE
-  source_size: Int
-  input: ^Byte
+  result: ^SourceFile
+  file_handle: ^FILE
 begin
-  input_file := fopen(filename, "rb\0".ptr)
+  file_handle := fopen(filename, "rb\0".ptr)

-  if input_file = nil then
-    return false
+  if file_handle <> nil then
+    result := cast(malloc(#size(SourceFile)): ^SourceFile)
+    result^.handle := file_handle
+    result^.size := 0u
+    result^.index := 1u
  end
-  defer
-    fclose(input_file)
-  end
-  if fseek(input_file, 0, SEEK_END) <> 0 then
-    return false
-  end
-  source_size := ftell(input_file)
-  if source_size < 0 then
-    return false
-  end
-  rewind(input_file)
-
-  input := malloc(cast(source_size: Word))
-  if fread(input, cast(source_size: Word), 1u, input_file) <> 1u then
-    return false
-  end
-  result^ := String(cast(input: ^Char), cast(source_size: Word))
-
-  return true
+  return result
 end

 proc escape_char(escape: Char, result: ^Char) -> Bool;
@ -312,118 +343,169 @@ begin
  return successful
 end

-proc advance_source(source_code: SourceCode, length: Word) -> SourceCode;
+proc source_file_empty(source_input: ^Byte) -> Bool;
+var
+  source_file: ^SourceFile
 begin
-  source_code.text := open_substring(source_code.text, length)
-  source_code.position.column := source_code.position.column + length
+  source_file := cast(source_input: ^SourceFile)

-  return source_code
+  if source_file^.index > source_file^.size then
+    source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle)
+	source_file^.index := 1u
+  end
+
+  return source_file^.size = 0u
 end

-proc skip_spaces(source_code: SourceCode) -> SourceCode;
+proc source_file_head(source_input: ^Byte) -> Char;
+var
+  source_file: ^SourceFile
 begin
-  while source_code.text.length > 0u and is_space(source_code.text[1u]) do
-    if source_code.text[1u] = '\n' then
-      source_code.position.line := source_code.position.line + 1u
-      source_code.position.column := 1u
-	else
-	  source_code.position.column := source_code.position.column + 1u
+  source_file := cast(source_input: ^SourceFile)
+
+  return source_file^.buffer[source_file^.index]
+end
+
+proc source_file_advance(source_input: ^Byte);
+var
+  source_file: ^SourceFile
+begin
+  source_file := cast(source_input: ^SourceFile)
+
+  source_file^.index := source_file^.index + 1u
+end
+
+proc source_code_empty(source_code: ^SourceCode) -> Bool;
+begin
+  return source_code^.empty(source_code^.input)
+end
+
+proc source_code_head(source_code: SourceCode) -> Char;
+begin
+  return source_code.head(source_code.input)
+end
+
+proc source_code_advance(source_code: ^SourceCode);
+begin
+  source_code^.advance(source_code^.input)
+  source_code^.position.column := source_code^.position.column
+end
+
+proc source_code_break(source_code: ^SourceCode);
+begin
+  source_code^.position.line := source_code^.position.line + 1u
+  source_code^.position.column := 0u
+end
+
+proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
+begin
+  return not source_code_empty(source_code) and source_code_head(source_code^) = expected
+end
+
+proc skip_spaces(source_code: ^SourceCode);
+begin
+  while not source_code_empty(source_code) and is_space(source_code_head(source_code^)) do
+    if source_code_head(source_code^) = '\n' then
+      source_code_break(source_code)
 	end
-    source_code.text := open_substring(source_code.text, 1u)
+	source_code_advance(source_code)
  end
-  return source_code
 end

-proc lex_identifier(source_code: ^SourceCode, token_content: ^String);
+proc is_ident(char: Char) -> Bool;
+begin
+  return is_alnum(char) or char = '_'
+end
+
+proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
 var
  content_length: Word
 begin
-  content_length := 0u
-  token_content^ := source_code^.text
-
-  while is_alnum(source_code^.text[1u]) or source_code^.text[1u] = '_' do
-    content_length := content_length + 1u
-	source_code^ := advance_source(source_code^, 1u)
+  while not source_code_empty(source_code) and is_ident(source_code_head(source_code^)) do
+    string_buffer_push(token_content, source_code_head(source_code^))
+	source_code_advance(source_code)
  end
-  token_content^ := substring(token_content^, 0u, content_length)
 end

-proc lex_comment(source_code: ^SourceCode, token_content: ^String) -> Bool;
+proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
 var
-  content_length: Word
  trailing: Word
 begin
-  content_length := 0u
-  token_content^ := source_code^.text
  trailing := 0u

-  while source_code^.text.length > 0u and trailing < 2u do
-    if source_code^.text[1u] = '*' then
-	  content_length := content_length + trailing
+  while not source_code_empty(source_code) and trailing < 2u do
+    if source_code_head(source_code^) = '*' then
+	  string_buffer_push(token_content, '*')
 	  trailing := 1u
-	elsif source_code^.text[1u] = ')' and trailing = 1u then
+	elsif source_code_head(source_code^) = ')' and trailing = 1u then
+      string_buffer_pop(token_content, 1u)
 	  trailing := 2u
 	else
-      content_length := content_length + trailing + 1u
+	  string_buffer_push(token_content, source_code_head(source_code^))
 	  trailing := 0u
 	end
-    source_code^ := advance_source(source_code^, 1u)
+    source_code_advance(source_code)
  end

  return trailing = 2u
 end

-proc lex_character(input: ^Char, current_token: ^Token) -> ^Char;
+proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
+var
+  successful: Bool
 begin
-  if input^ = '\\' then
-    input := input + 1
-	if escape_char(input^, @current_token^.value.char_value) then
-	  input := input + 1
-	end
-  elsif input^ <> '\0' then
-    current_token^.value.char_value := input^
-	input := input + 1
+  successful := not source_code_empty(source_code)
+
+  if successful then
+    if source_code_head(source_code^) = '\\' then
+      source_code_advance(source_code)
+
+      successful := not source_code_empty(source_code) and escape_char(source_code_head(source_code^), token_content)
+	else
+      token_content^ := source_code_head(source_code^)
+	  successful := true
+    end
  end
-  return input
+  if successful then
+    source_code_advance(source_code)
+  end
+  return successful
 end

-proc lex_string(input: ^Char, current_token: ^Token) -> ^Char;
+proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
 var
  token_end, constructed_string: ^Char
  token_length: Word
  is_valid: Bool
+  next_char: Char
 begin
-  token_end := input
-
-  while token_end^ <> '\0' and not ((token_end - 1)^ <> '\\' and token_end^ = '"') do
-    token_end := token_end + 1
-  end
-  if token_end^ <> '\"' then
-    return input
-  end
-  token_length := cast(token_end - input: Word)
-  current_token^.value.string_value := cast(calloc(token_length, 1u): ^Char)
-
  is_valid := true
-  constructed_string := current_token^.value.string_value
-  while input < token_end and is_valid do

-    if input^ = '\\' then
-      input := input + 1
-      if escape_char(input^, constructed_string) then
-        input := input + 1
-	  else
-	    is_valid := false
-      end
-    elsif input^ <> '\0' then
-      constructed_string^ := input^
-      input := input + 1
+  while is_valid and not source_code_empty(source_code) and source_code_head(source_code^) <> '"' do
+    is_valid := lex_character(source_code, @next_char)
+
+    if is_valid then
+      string_buffer_push(token_content, next_char)
    end
-
-    constructed_string := constructed_string + 1
  end

-  return token_end
+  if is_valid and source_code_expect(source_code, '"') then
+    source_code_advance(source_code)
+  else
+    is_valid := false
+  end
+  return is_valid
+end
+
+proc lex_number(source_code: ^SourceCode, token_content: ^Int);
+begin
+  token_content^ := 0
+
+  while not source_code_empty(source_code) and is_digit(source_code_head(source_code^)) do
+    token_content^ := token_content^ * 10 + (cast(source_code_head(source_code^): Int) - cast('0': Int))
+
+    source_code_advance(source_code)
+  end
 end

 proc print_tokens(tokens: ^Token, tokens_size: Word);
@ -647,46 +729,43 @@ end

 proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token;
 var
-  token_end: ^Char
  tokens, current_token: ^Token
-  token_length: Word
  first_char: Char
-  token_content: String
+  token_buffer: StringBuffer
 begin
  tokens_size^ := 0u
  tokens := nil
-  source_code := skip_spaces(source_code)
+  token_buffer := string_buffer_new()

-  while source_code.text.length <> 0u do
-	tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, Token.size): ^Token)
+  skip_spaces(@source_code)
+
+  while not source_code_empty(@source_code) do
+	tokens := cast(reallocarray(cast(tokens: ^Byte), tokens_size^ + 1u, #size(Token)): ^Token)
    current_token := tokens + tokens_size^
-	first_char := source_code.text[1u]
+	first_char := source_code_head(source_code)

    if is_alpha(first_char) or first_char = '_' then
-      lex_identifier(@source_code, @token_content)
-      current_token^ := categorize_identifier(token_content)
+      lex_identifier(@source_code, @token_buffer)
+      current_token^ := categorize_identifier(string_buffer_clear(@token_buffer))
 	elsif is_digit(first_char) then
-	  token_end := nil
-	  current_token^.value.int_value := strtol(source_code.text.ptr, @token_end, 10)
-	  token_length := cast(token_end - source_code.text.ptr: Word)
+	  lex_number(@source_code, @current_token^.value.int_value)

-	  if token_end^ = 'u' then
+	  if source_code_expect(@source_code, 'u') then
 	    current_token^.kind := TOKEN_WORD
-        source_code := advance_source(source_code, token_length + 1u)
+        source_code_advance(@source_code)
 	  else
 	    current_token^.kind := TOKEN_INTEGER
-        source_code := advance_source(source_code, token_length)
 	  end
 	elsif first_char = '(' then
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)

-	  if source_code.text.length = 0u then
+	  if source_code_empty(@source_code) then
 	    current_token^.kind := TOKEN_LEFT_PAREN
-	  elsif source_code.text[1u] = '*' then
-	    source_code := advance_source(source_code, 1u)
+	  elsif source_code_head(source_code) = '*' then
+	    source_code_advance(@source_code)

-		if lex_comment(@source_code, @token_content) then
-	      current_token^.value.string := string_dup(token_content)
+		if lex_comment(@source_code, @token_buffer) then
+	      current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
 		  current_token^.kind := TOKEN_COMMENT
 		else
 	      current_token^.kind := 0
@ -696,125 +775,125 @@ begin
 	  end
 	elsif first_char = ')' then
 	  current_token^.kind := TOKEN_RIGHT_PAREN
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '\'' then
-	  token_end := lex_character(source_code.text.ptr + 1, current_token)
-	  token_length := cast(token_end - source_code.text.ptr: Word)
+      source_code_advance(@source_code)

-	  if token_end^ = '\'' then
+	  if lex_character(@source_code, @current_token^.value.char_value) and source_code_expect(@source_code, '\'') then
 	  	current_token^.kind := TOKEN_CHARACTER
-	    source_code := advance_source(source_code, token_length + 1u)
+	    source_code_advance(@source_code)
 	  else
-	    source_code := advance_source(source_code, 1u)
+	    current_token^.kind := 0
 	  end
 	elsif first_char = '"' then
-	  token_end := lex_string(source_code.text.ptr + 1, current_token)
-
-      if token_end^ = '"' then
+      source_code_advance(@source_code)
+	  
+      if lex_string(@source_code, @token_buffer) then
 		current_token^.kind := TOKEN_STRING
-	    token_length := cast(token_end - source_code.text.ptr: Word)
-	    source_code := advance_source(source_code, token_length + 1u)
+		current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
+	  else
+	    current_token^.kind := 0
 	  end
 	elsif first_char = '[' then
 	  current_token^.kind := TOKEN_LEFT_SQUARE
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = ']' then
 	  current_token^.kind := TOKEN_RIGHT_SQUARE
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '>' then
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)

-      if source_code.text.length = 0u then
+      if source_code_empty(@source_code) then
 	    current_token^.kind := TOKEN_GREATER_THAN
-	  elsif source_code.text[1u] = '=' then
+	  elsif source_code_head(source_code) = '=' then
 	    current_token^.kind := TOKEN_GREATER_EQUAL
-	    source_code := advance_source(source_code, 1u)
-	  elsif source_code.text[1u] = '>' then
+	    source_code_advance(@source_code)
+	  elsif source_code_head(source_code) = '>' then
 	    current_token^.kind := TOKEN_SHIFT_RIGHT
-	    source_code := advance_source(source_code, 1u)
+	    source_code_advance(@source_code)
 	  else
 	    current_token^.kind := TOKEN_GREATER_THAN
 	  end
 	elsif first_char = '<' then
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)

-	  if source_code.text.length = 0u then
+	  if source_code_empty(@source_code) then
 	    current_token^.kind := TOKEN_LESS_THAN
-	  elsif source_code.text[1u] = '=' then
+	  elsif source_code_head(source_code) = '=' then
 	    current_token^.kind := TOKEN_LESS_EQUAL
-	    source_code := advance_source(source_code, 1u)
-	  elsif source_code.text[1u] = '<' then
+	    source_code_advance(@source_code)
+	  elsif source_code_head(source_code) = '<' then
 	    current_token^.kind := TOKEN_SHIFT_LEFT
-	    source_code := advance_source(source_code, 1u)
-	  elsif source_code.text[1u] = '>' then
+	    source_code_advance(@source_code)
+	  elsif source_code_head(source_code) = '>' then
 	    current_token^.kind := TOKEN_NOT_EQUAL
-	    source_code := advance_source(source_code, 1u)
+	    source_code_advance(@source_code)
 	  else
 	    current_token^.kind := TOKEN_LESS_THAN
 	  end
 	elsif first_char = '=' then
 	  current_token^.kind := TOKEN_EQUAL
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = ';' then
 	  current_token^.kind := TOKEN_SEMICOLON
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '.' then
 	  current_token^.kind := TOKEN_DOT
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = ',' then
 	  current_token^.kind := TOKEN_COMMA
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '+' then
 	  current_token^.kind := TOKEN_PLUS
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '-' then
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)

-      if source_code.text.length = 0u then
+      if source_code_empty(@source_code) then
 	    current_token^.kind := TOKEN_MINUS
-	  elsif source_code.text[1u] = '>' then
+	  elsif source_code_head(source_code) = '>' then
 	    current_token^.kind := TOKEN_ARROW
-	    source_code := advance_source(source_code, 1u)
+	    source_code_advance(@source_code)
 	  else
 	    current_token^.kind := TOKEN_MINUS
 	  end
 	elsif first_char = '*' then
 	  current_token^.kind := TOKEN_MULTIPLICATION
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '/' then
 	  current_token^.kind := TOKEN_DIVISION
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '%' then
 	  current_token^.kind := TOKEN_REMAINDER
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = ':' then
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)

-	  if source_code.text.length = 0u then
+	  if source_code_empty(@source_code) then
 	    current_token^.kind := TOKEN_COLON
-	  elsif source_code.text[1u] = '=' then
+	  elsif source_code_head(source_code) = '=' then
 	    current_token^.kind := TOKEN_ASSIGNMENT
-	    source_code := advance_source(source_code, 1u)
+	    source_code_advance(@source_code)
 	  else
 	    current_token^.kind := TOKEN_COLON
 	  end
 	elsif first_char = '^' then
 	  current_token^.kind := TOKEN_HAT
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '@' then
 	  current_token^.kind := TOKEN_AT
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	elsif first_char = '!' then
 	  current_token^.kind := TOKEN_EXCLAMATION
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	else
 	  current_token^.kind := 0
-	  source_code := advance_source(source_code, 1u)
+	  source_code_advance(@source_code)
 	end

 	if current_token^.kind <> 0 then
      tokens_size^ := tokens_size^ + 1u
-      source_code := skip_spaces(source_code)
+      skip_spaces(@source_code)
 	else
 	  write_s("Lexical analysis error on \"")
 	  write_c(first_char)
@ -832,7 +911,7 @@ var
  result: ^CommandLine
 begin
  i := 1
-  result := cast(malloc(CommandLine.size): ^CommandLine)
+  result := cast(malloc(#size(CommandLine)): ^CommandLine)
  result^.tokenize := false
  result^.syntax_tree := false
  result^.input := nil
@ -872,23 +951,38 @@ var
  tokens_size: Word
  source_code: SourceCode
  command_line: ^CommandLine
+  return_code: Int
 begin
+  return_code := 0
+
  command_line := parse_command_line(argc, argv)
  if command_line = nil then
-    return 2
+    return_code := 2
  end

-  source_code.position := make_position()
-  if not read_source(command_line^.input, @source_code.text) then
-    perror(command_line^.input)
-	return 3
-  end
-  tokens := tokenize(source_code, @tokens_size)
+  if return_code = 0 then
+	source_code.position := make_position()

-  if command_line^.tokenize then
-    print_tokens(tokens, tokens_size)
+	source_code.input := cast(read_source(command_line^.input): ^Byte)
+	source_code.empty := source_file_empty
+	source_code.head := source_file_head
+	source_code.advance := source_file_advance
+
+	if source_code.input = nil then
+      perror(command_line^.input)
+      return_code := 3
+	end
  end
-  return 0
+  if return_code = 0 then
+    tokens := tokenize(source_code, @tokens_size)
+
+    fclose(cast(source_code.input: ^SourceFile)^.handle)
+
+    if command_line^.tokenize then
+      print_tokens(tokens, tokens_size)
+    end
+  end
+  return return_code
 end

 begin