Update to GCC 15.1

2025-05-14 23:05:52 +02:00
parent a02e053ed2
commit 981059e745
5 changed files with 218 additions and 393 deletions
--- a/source.elna
+++ b/source.elna
@ -1,7 +1,12 @@
+program
+
 const
  SEEK_SET* := 0
  SEEK_CUR* := 1
  SEEK_END* := 2
+  STDIN := 0
+  STDOUT := 1
+  STDERR := 2

 type
  TokenKind* = (
@ -19,12 +24,12 @@ type
    _extern,
    _const,
    _var,
-    array,
+    _case,
    _of,
    _type,
    _record,
    _union,
-    pointer,
+    pipe,
    to,
    boolean,
    _nil,
@ -64,7 +69,9 @@ type
    string,
    _defer,
    exclamation,
-    arrow
+    arrow,
+	trait,
+	_program
  )
  Position* = record
    line: Word;
@ -106,56 +113,58 @@ type
  end
  CommandLine* = record
    input: ^Char;
-	tokenize: Bool;
-	syntax_tree: Bool
+	lex: Bool;
+	parse: Bool
  end

 (*
  External procedures.
 *)
-proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE; extern
-proc fclose(stream: ^FILE) -> Int; extern
-proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int; extern
-proc rewind(stream: ^FILE); extern
-proc ftell(stream: ^FILE) -> Int; extern
-proc fread(ptr: ^Byte, size: Word, nmemb: Word, stream: ^FILE) -> Word; extern
-proc write(fd: Int, buf: ^Byte, Word: Int) -> Int; extern

-proc malloc(size: Word) -> ^Byte; extern
-proc free(ptr: ^Byte); extern
-proc calloc(nmemb: Word, size: Word) -> ^Byte; extern
-proc realloc(ptr: ^Byte, size: Word) -> ^Byte; extern
+proc fopen(pathname: ^Char, mode: ^Char) -> ^FILE extern
+proc fclose(stream: ^FILE) -> Int extern
+proc fseek(stream: ^FILE, off: Int, whence: Int) -> Int extern
+proc rewind(stream: ^FILE) extern
+proc ftell(stream: ^FILE) -> Int extern
+proc fread(ptr: ^Byte, size: Word, nmemb: Word, stream: ^FILE) -> Word extern
+proc write(fd: Int, buf: ^Byte, Word: Int) -> Int extern

-proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char; extern
+proc malloc(size: Word) -> ^Byte extern
+proc free(ptr: ^Byte) extern
+proc calloc(nmemb: Word, size: Word) -> ^Byte extern
+proc realloc(ptr: ^Byte, size: Word) -> ^Byte extern

-proc strcmp(s1: ^Char, s2: ^Char) -> Int; extern
-proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int; extern
-proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char; extern
-proc strcpy(dst: ^Char, src: ^Char) -> ^Char; extern
-proc strlen(ptr: ^Char) -> Word; extern
+proc memset(ptr: ^Char, c: Int, n: Int) -> ^Char extern

-proc perror(s: ^Char); extern
-proc exit(code: Int) -> !; extern
+proc strcmp(s1: ^Char, s2: ^Char) -> Int extern
+proc strncmp(s1: ^Char, s2: ^Char, n: Word) -> Int extern
+proc strncpy(dst: ^Char, src: ^Char, dsize: Word) -> ^Char extern
+proc strcpy(dst: ^Char, src: ^Char) -> ^Char extern
+proc strlen(ptr: ^Char) -> Word extern
+
+proc perror(s: ^Char) extern
+proc exit(code: Int) -> ! extern

 (*
  Standard procedures.
 *)
-proc reallocarray(ptr: ^Byte, n: Word, size: Word) -> ^Byte;
+
+proc reallocarray(ptr: ^Byte, n: Word, size: Word) -> ^Byte
 begin
  return realloc(ptr, n * size)
 end

-proc write_s(value: String);
+proc write_s(value: String)
 begin
  write(0, cast(value.ptr: ^Byte), cast(value.length: Int))
 end

-proc write_z(value: ^Char);
+proc write_z(value: ^Char)
 begin
  write(0, cast(value: ^Byte), cast(strlen(value): Int))
 end

-proc write_b(value: Bool);
+proc write_b(value: Bool)
 begin
  if value then
    write_s("true")
@ -164,12 +173,12 @@ begin
  end
 end

-proc write_c(value: Char);
+proc write_c(value: Char)
 begin
  write(0, cast(@value: ^Byte), 1)
 end

-proc write_i(value: Int);
+proc write_i(value: Int)
 var
  digit: Int
  n: Word
@ -193,42 +202,42 @@ begin
  end
 end

-proc write_u(value: Word);
+proc write_u(value: Word)
 begin
  write_i(cast(value: Int))
 end

-proc is_digit(c: Char) -> Bool;
+proc is_digit(c: Char) -> Bool
 begin
  return cast(c: Int) >= cast('0': Int) & cast(c: Int) <= cast('9': Int)
 end

-proc is_alpha(c: Char) -> Bool;
+proc is_alpha(c: Char) -> Bool
 begin
  return cast(c: Int) >= cast('A': Int) & cast(c: Int) <= cast('z': Int)
 end

-proc is_alnum(c: Char) -> Bool;
+proc is_alnum(c: Char) -> Bool
 begin
  return is_digit(c) or is_alpha(c)
 end

-proc is_space(c: Char) -> Bool;
+proc is_space(c: Char) -> Bool
 begin
  return c = ' ' or c = '\n' or c = '\t'
 end

-proc substring(string: String, start: Word, count: Word) -> String;
+proc substring(string: String, start: Word, count: Word) -> String
 begin
  return String(string.ptr + start, count)
 end

-proc open_substring(string: String, start: Word) -> String;
+proc open_substring(string: String, start: Word) -> String
 begin
  return substring(string, start, string.length - start)
 end

-proc string_dup(origin: String) -> String;
+proc string_dup(origin: String) -> String
 var
  copy: ^Char
 begin
@ -238,7 +247,7 @@ begin
  return String(copy, origin.length)
 end

-proc string_buffer_new() -> StringBuffer;
+proc string_buffer_new() -> StringBuffer
 var
  result: StringBuffer
 begin
@ -249,7 +258,7 @@ begin
  return result
 end

-proc string_buffer_push(buffer: ^StringBuffer, char: Char);
+proc string_buffer_push(buffer: ^StringBuffer, char: Char)
 begin
  if buffer^.size >= buffer^.capacity then
    buffer^.capacity := buffer^.capacity + 1024u;
@ -259,12 +268,12 @@ begin
  buffer^.size := buffer^.size + 1u
 end

-proc string_buffer_pop(buffer: ^StringBuffer, count: Word);
+proc string_buffer_pop(buffer: ^StringBuffer, count: Word)
 begin
  buffer^.size := buffer^.size - count
 end

-proc string_buffer_clear(buffer: ^StringBuffer) -> String;
+proc string_buffer_clear(buffer: ^StringBuffer) -> String
 var
  result: String
 begin
@ -274,15 +283,10 @@ begin
 end

 (*
-  End of standard procedures.
+  Source code stream procedures.
 *)

-proc make_position() -> Position;
-begin
-  return Position(1u, 1u)
-end
-
-proc read_source(filename: ^Char) -> ^SourceFile;
+proc read_source(filename: ^Char) -> ^SourceFile
 var
  result: ^SourceFile
  file_handle: ^FILE
@ -298,7 +302,70 @@ begin
  return result
 end

-proc escape_char(escape: Char, result: ^Char) -> Bool;
+proc source_file_empty(source_input: ^Byte) -> Bool
+var
+  source_file: ^SourceFile
+begin
+  source_file := cast(source_input: ^SourceFile);
+
+  if source_file^.index > source_file^.size then
+    source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle);
+	source_file^.index := 1u
+  end;
+
+  return source_file^.size = 0u
+end
+
+proc source_file_head(source_input: ^Byte) -> Char
+var
+  source_file: ^SourceFile
+begin
+  source_file := cast(source_input: ^SourceFile);
+
+  return source_file^.buffer[source_file^.index]
+end
+
+proc source_file_advance(source_input: ^Byte)
+var
+  source_file: ^SourceFile
+begin
+  source_file := cast(source_input: ^SourceFile);
+
+  source_file^.index := source_file^.index + 1u
+end
+
+proc source_code_empty(source_code: ^SourceCode) -> Bool
+begin
+  return source_code^.empty(source_code^.input)
+end
+
+proc source_code_head(source_code: SourceCode) -> Char
+begin
+  return source_code.head(source_code.input)
+end
+
+proc source_code_advance(source_code: ^SourceCode)
+begin
+  source_code^.advance(source_code^.input);
+  source_code^.position.column := source_code^.position.column
+end
+
+proc source_code_break(source_code: ^SourceCode)
+begin
+  source_code^.position.line := source_code^.position.line + 1u;
+  source_code^.position.column := 0u
+end
+
+proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool
+begin
+  return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
+end
+
+(*
+  Token procedures.
+*)
+
+proc escape_char(escape: Char, result: ^Char) -> Bool
 var
  successful: Bool
 begin
@ -344,66 +411,7 @@ begin
  return successful
 end

-proc source_file_empty(source_input: ^Byte) -> Bool;
-var
-  source_file: ^SourceFile
-begin
-  source_file := cast(source_input: ^SourceFile);
-
-  if source_file^.index > source_file^.size then
-    source_file^.size := fread(cast(@source_file^.buffer: ^Byte), 1u, 1024u, source_file^.handle);
-	source_file^.index := 1u
-  end;
-
-  return source_file^.size = 0u
-end
-
-proc source_file_head(source_input: ^Byte) -> Char;
-var
-  source_file: ^SourceFile
-begin
-  source_file := cast(source_input: ^SourceFile);
-
-  return source_file^.buffer[source_file^.index]
-end
-
-proc source_file_advance(source_input: ^Byte);
-var
-  source_file: ^SourceFile
-begin
-  source_file := cast(source_input: ^SourceFile);
-
-  source_file^.index := source_file^.index + 1u
-end
-
-proc source_code_empty(source_code: ^SourceCode) -> Bool;
-begin
-  return source_code^.empty(source_code^.input)
-end
-
-proc source_code_head(source_code: SourceCode) -> Char;
-begin
-  return source_code.head(source_code.input)
-end
-
-proc source_code_advance(source_code: ^SourceCode);
-begin
-  source_code^.advance(source_code^.input);
-  source_code^.position.column := source_code^.position.column
-end
-
-proc source_code_break(source_code: ^SourceCode);
-begin
-  source_code^.position.line := source_code^.position.line + 1u;
-  source_code^.position.column := 0u
-end
-
-proc source_code_expect(source_code: ^SourceCode, expected: Char) -> Bool;
-begin
-  return ~source_code_empty(source_code) & source_code_head(source_code^) = expected
-end
-
-proc skip_spaces(source_code: ^SourceCode);
+proc skip_spaces(source_code: ^SourceCode)
 var
  current: Char
 begin
@ -419,12 +427,12 @@ begin
  end
 end

-proc is_ident(char: Char) -> Bool;
+proc is_ident(char: Char) -> Bool
 begin
  return is_alnum(char) or char = '_'
 end

-proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer);
+proc lex_identifier(source_code: ^SourceCode, token_content: ^StringBuffer)
 var
  content_length: Word
 begin
@ -434,7 +442,7 @@ begin
  end
 end

-proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
+proc lex_comment(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool
 var
  trailing: Word
 begin
@ -457,7 +465,7 @@ begin
  return trailing = 2u
 end

-proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool;
+proc lex_character(source_code: ^SourceCode, token_content: ^Char) -> Bool
 var
  successful: Bool
 begin
@ -479,7 +487,7 @@ begin
  return successful
 end

-proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool;
+proc lex_string(source_code: ^SourceCode, token_content: ^StringBuffer) -> Bool
 var
  token_end, constructed_string: ^Char
  token_length: Word
@ -504,7 +512,7 @@ begin
  return is_valid
 end

-proc lex_number(source_code: ^SourceCode, token_content: ^Int);
+proc lex_number(source_code: ^SourceCode, token_content: ^Int)
 begin
  token_content^ := 0;

@ -515,7 +523,7 @@ begin
  end
 end

-proc print_tokens(tokens: ^Token, tokens_size: Word);
+proc print_tokens(tokens: ^Token, tokens_size: Word)
 var
  current_token: ^Token
  i: Word
@ -549,8 +557,8 @@ begin
 	    write_s("CONST")
 	  | TokenKind._var:
 	    write_s("VAR")
-   	  | TokenKind.array:
-	    write_s("ARRAY")
+   	  | TokenKind._case:
+	    write_s("CASE")
 	  | TokenKind._of:
 	    write_s("OF")
 	  | TokenKind._type:
@ -559,8 +567,8 @@ begin
 	    write_s("RECORD")
 	  | TokenKind._union:
 	    write_s("UNION")
-	  | TokenKind.pointer:
-	    write_s("POINTER")
+	  | TokenKind.pipe:
+	    write_s("|")
 	  | TokenKind.to:
 	    write_s("TO")
 	  | TokenKind.boolean:
@ -570,11 +578,11 @@ begin
 	  | TokenKind._nil:
 	    write_s("NIL")
 	  | TokenKind.and:
-	    write_s("AND")
+	    write_s("&")
 	  | TokenKind._or:
 	    write_s("OR")
 	  | TokenKind.not:
-	    write_s("NOT")
+	    write_s("~")
 	  | TokenKind._return:
 	    write_s("RETURN")
 	  | TokenKind._cast:
@ -587,6 +595,9 @@ begin
 	    write_c('<');
        write_s(current_token^.value.string);
 	    write_c('>')
+	  | TokenKind.trait:
+	    write_c('#');
+		write_s(current_token^.value.string)
 	  | TokenKind.left_paren:
 	    write_s("(")
 	  | TokenKind.right_paren:
@ -653,6 +664,8 @@ begin
 	    write_c('!')
 	  | TokenKind.arrow:
 	    write_s("->")
+	  | TokenKind._program:
+	    write_s("PROGRAM")
 	  else
 	    write_s("UNKNOWN<");
 	    write_i(cast(current_token^.kind: Int));
@ -665,68 +678,62 @@ begin
  write_c('\n')
 end

-proc categorize_identifier(token_content: String) -> Token;
+proc categorize_identifier(token_content: String) -> Token
 var
  current_token: Token
 begin
-  if "if" = token_content then
+  if token_content = "if" then
 	current_token.kind := TokenKind._if
-  elsif "then" = token_content then
+  elsif token_content = "then" then
 	current_token.kind := TokenKind._then
-  elsif "else" = token_content then
+  elsif token_content = "else" then
 	current_token.kind := TokenKind._else
-  elsif "elsif" = token_content then
+  elsif token_content = "elsif" then
 	current_token.kind := TokenKind._elsif
-  elsif "while" = token_content then
+  elsif token_content = "while" then
 	current_token.kind := TokenKind._while
-  elsif "do" = token_content then
+  elsif token_content = "do" then
 	current_token.kind := TokenKind._do
-  elsif "proc" = token_content then
+  elsif token_content = "proc" then
 	current_token.kind := TokenKind._proc
-  elsif "begin" = token_content then
+  elsif token_content = "begin" then
 	current_token.kind := TokenKind._begin
-  elsif "end" = token_content then
+  elsif token_content = "end" then
 	current_token.kind := TokenKind._end
-  elsif "extern" = token_content then
+  elsif token_content = "extern" then
 	current_token.kind := TokenKind._extern
-  elsif "const" = token_content then
+  elsif token_content = "const" then
 	current_token.kind := TokenKind._const
-  elsif "var" = token_content then
+  elsif token_content = "var" then
 	current_token.kind := TokenKind._var
-  elsif "array" = token_content then
-	current_token.kind := TokenKind.array
-  elsif "of" = token_content then
+  elsif token_content = "case" then
+	current_token.kind := TokenKind._case
+  elsif token_content = "of" then
 	current_token.kind := TokenKind._of
-  elsif "type" = token_content then
+  elsif token_content = "type" then
 	current_token.kind := TokenKind._type
-  elsif "record" = token_content then
+  elsif token_content = "record" then
 	current_token.kind := TokenKind._record
-  elsif "union" = token_content then
+  elsif token_content = "union" then
 	current_token.kind := TokenKind._union
-  elsif "pointer" = token_content then
-	current_token.kind := TokenKind.pointer
-  elsif "to" = token_content then
-	current_token.kind := TokenKind.to
-  elsif "true" = token_content then
+  elsif token_content = "true" then
 	current_token.kind := TokenKind.boolean;
 	current_token.value.boolean_value := true
-  elsif "false" = token_content then
+  elsif token_content = "false" then
 	current_token.kind := TokenKind.boolean;
 	current_token.value.boolean_value := false
-  elsif "nil" = token_content then
+  elsif token_content = "nil" then
 	current_token.kind := TokenKind._nil
-  elsif "and" = token_content then
-	current_token.kind := TokenKind.and
-  elsif "or" = token_content then
+  elsif token_content = "or" then
 	current_token.kind := TokenKind._or
-  elsif "not" = token_content then
-	current_token.kind := TokenKind.not
-  elsif "return" = token_content then
+  elsif token_content = "return" then
 	current_token.kind := TokenKind._return
-  elsif "cast" = token_content then
+  elsif token_content = "cast" then
 	current_token.kind := TokenKind._cast
-  elsif "defer" = token_content then
+  elsif token_content = "defer" then
 	current_token.kind := TokenKind._defer
+  elsif token_content = "program" then
+	current_token.kind := TokenKind._program
  else
 	current_token.kind := TokenKind.identifier;
 	current_token.value.string := string_dup(token_content)
@ -735,7 +742,7 @@ begin
  return current_token
 end

-proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token;
+proc tokenize(source_code: SourceCode, tokens_size: ^Word) -> ^Token
 var
  tokens, current_token: ^Token
  first_char: Char
@ -755,6 +762,12 @@ begin
    if is_alpha(first_char) or first_char = '_' then
      lex_identifier(@source_code, @token_buffer);
      current_token^ := categorize_identifier(string_buffer_clear(@token_buffer))
+	elsif first_char = '#' then
+	  source_code_advance(@source_code);
+	  lex_identifier(@source_code, @token_buffer);
+
+	  current_token^.kind := TokenKind.trait;
+	  current_token^.value.string := string_dup(string_buffer_clear(@token_buffer))
 	elsif is_digit(first_char) then
 	  lex_number(@source_code, @current_token^.value.int_value);

@ -894,6 +907,15 @@ begin
 	elsif first_char = '!' then
 	  current_token^.kind := TokenKind.exclamation;
 	  source_code_advance(@source_code)
+	elsif first_char = '&' then
+	  current_token^.kind := TokenKind.and;
+	  source_code_advance(@source_code)
+	elsif first_char = '~' then
+	  current_token^.kind := TokenKind.not;
+	  source_code_advance(@source_code)
+	elsif first_char = '|' then
+	  current_token^.kind := TokenKind.pipe;
+	  source_code_advance(@source_code)
 	else
 	  current_token^.kind := TokenKind.unknown;
 	  source_code_advance(@source_code)
@ -912,7 +934,11 @@ begin
  return tokens
 end

-proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine;
+(*
+  Command line handling.
+*)
+
+proc parse_command_line*(argc: Int, argv: ^^Char) -> ^CommandLine
 var
  parameter: ^^Char
  i: Int
@ -920,23 +946,26 @@ var
 begin
  i := 1;
  result := cast(malloc(#size(CommandLine)): ^CommandLine);
-  result^.tokenize := false;
-  result^.syntax_tree := false;
+  result^.lex := false;
+  result^.parse := false;
  result^.input := nil;

  while i < argc do
    parameter := argv + i;

-    if strcmp(parameter^, "--tokenize\0".ptr) = 0 then
-      result^.tokenize := true
-    elsif strcmp(parameter^, "--syntax-tree\0".ptr) = 0 then
-      result^.syntax_tree := true
+    if strcmp(parameter^, "--lex\0".ptr) = 0 then
+      result^.lex := true
+    elsif strcmp(parameter^, "--parse\0".ptr) = 0 then
+      result^.parse := true
    elsif parameter^^ <> '-' then
+	  if result^.input <> nil then
+		write_s("Fatal error: Only one source file can be given.\n");
+		return nil
+	  end;
      result^.input := parameter^
    else
-      write_s("Fatal error: Unknown command line options:");
+      write_s("Fatal error: Unknown command line options: ");

-      write_c(' ');
      write_z(parameter^);
      write_s(".\n");

@ -953,7 +982,11 @@ begin
  return result
 end

-proc process(argc: Int, argv: ^^Char) -> Int;
+(*
+  Compilation entry.
+*)
+
+proc process(argc: Int, argv: ^^Char) -> Int
 var
  tokens: ^Token
  tokens_size: Word
@ -969,7 +1002,7 @@ begin
  end;

  if return_code = 0 then
-	source_code.position := make_position();
+	source_code.position := Position(1u, 1u);

 	source_code.input := cast(read_source(command_line^.input): ^Byte);
 	source_code.empty := source_file_empty;
@ -986,7 +1019,7 @@ begin

    fclose(cast(source_code.input: ^SourceFile)^.handle);

-    if command_line^.tokenize then
+    if command_line^.lex then
      print_tokens(tokens, tokens_size)
    end
  end;