diff options
Diffstat (limited to 'source/lexer.elna')
| -rw-r--r-- | source/lexer.elna | 139 |
1 files changed, 69 insertions, 70 deletions
diff --git a/source/lexer.elna b/source/lexer.elna index d5f529b..e6fc38c 100644 --- a/source/lexer.elna +++ b/source/lexer.elna @@ -1,12 +1,11 @@ (* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/. *) -module; -import cstdio, cstring, cctype, cstdlib, common; +import cstdio, cstring, cctype, cstdlib, common const - CHUNK_SIZE := 85536u; + CHUNK_SIZE := 85536u type (* @@ -38,7 +37,7 @@ type greater, less, other - ); + ) TransitionState = ( start, colon, @@ -56,7 +55,7 @@ type leading_zero, decimal_suffix, finish - ); + ) LexerToken = record kind: LexerKind; value: union @@ -67,18 +66,18 @@ type end; start_location: TextLocation; end_location: TextLocation - end; - TransitionAction = proc(^Lexer, ^LexerToken); + end + TransitionAction = proc(^Lexer, ^LexerToken) Transition = record action: TransitionAction; next_state: TransitionState - end; - TransitionClasses = [22]Transition; + end + TransitionClasses = [22]Transition BufferPosition* = record iterator: ^Char; location: TextLocation - end; + end Lexer* = record input: ^FILE; buffer: ^Char; @@ -86,7 +85,7 @@ type length: Word; start: BufferPosition; current: BufferPosition - end; + end LexerKind* = ( unknown, identifier, @@ -153,15 +152,15 @@ type _program, _module, _import - ); + ) var - classification: [128]TransitionClass; - transitions: [16]TransitionClasses; + classification: [128]TransitionClass + transitions: [16]TransitionClasses -proc initialize_classification(); +proc initialize_classification() var - i: Word; + i: Word begin classification[1] := TransitionClass.eof; (* NUL *) classification[2] := TransitionClass.invalid; (* SOH *) @@ -297,13 +296,13 @@ begin classification[i] := TransitionClass.other; i := i + 1u end -end; +end -proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool; +proc compare_keyword(keyword: String, token_start: BufferPosition, token_end: ^Char) -> Bool var - result: Bool; - index: Word; - continue: Bool; + result: Bool + index: Word + continue: Bool begin index := 0u; result := true; @@ -319,28 +318,28 @@ begin result := result & index = keyword.length; return result & (token_start.iterator = token_end) -end; +end (* Reached the end of file. *) -proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_eof(lexer: ^Lexer, token: ^LexerToken) begin token^.kind := LexerKind.unknown -end; +end -proc increment(position: ^BufferPosition); +proc increment(position: ^BufferPosition) begin position^.iterator := position^.iterator + 1 -end; +end (* Add the character to the token currently read and advance to the next character. *) -proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_accumulate(lexer: ^Lexer, token: ^LexerToken) begin increment(@lexer^.current) -end; +end (* The current character is not a part of the token. Finish the token already * read. Don't advance to the next character. *) -proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_finalize(lexer: ^Lexer, token: ^LexerToken) begin if lexer^.start.iterator^ = ':' then token^.kind := LexerKind.colon @@ -360,10 +359,10 @@ begin if lexer^.start.iterator^ = '.' then token^.kind := LexerKind.dot end -end; +end (* An action for tokens containing multiple characters. *) -proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_composite(lexer: ^Lexer, token: ^LexerToken) begin if lexer^.start.iterator^ = '<' then if lexer^.current.iterator^ = '>' then @@ -383,10 +382,10 @@ begin token^.kind := LexerKind.arrow end; increment(@lexer^.current) -end; +end (* Skip a space. *) -proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_skip(lexer: ^Lexer, token: ^LexerToken) begin increment(@lexer^.start); @@ -395,12 +394,12 @@ begin lexer^.start.location.column := 1u end; lexer^.current := lexer^.start -end; +end (* Delimited string action. *) -proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_delimited(lexer: ^Lexer, token: ^LexerToken) var - text_length: Word; + text_length: Word begin if lexer^.start.iterator^ = '(' then token^.kind := LexerKind.comment @@ -422,10 +421,10 @@ begin token^.kind := LexerKind.string end; increment(@lexer^.current) -end; +end (* Finalize keyword or identifier. *) -proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_key_id(lexer: ^Lexer, token: ^LexerToken) begin token^.kind := LexerKind.identifier; @@ -515,11 +514,11 @@ begin token^.kind := LexerKind.boolean; token^.value.booleanKind := false end -end; +end (* Action for tokens containing only one character. The character cannot be * followed by other characters forming a composite token. *) -proc transition_action_single(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_single(lexer: ^Lexer, token: ^LexerToken) begin if lexer^.current.iterator^ = '&' then token^.kind := LexerKind.and @@ -567,14 +566,14 @@ begin token^.kind := LexerKind.pipe end; increment(@lexer^.current) -end; +end (* Handle an integer literal. *) -proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken); +proc transition_action_integer(lexer: ^Lexer, token: ^LexerToken) var - buffer: String; - integer_length: Word; - found: Bool; + buffer: String + integer_length: Word + found: Bool begin token^.kind := LexerKind.integer; @@ -584,12 +583,12 @@ begin token^.value.identifierKind[cast(token^.value.identifierKind[1]: Int) + 2] := '\0'; token^.value.integerKind := atoi(@token^.value.identifierKind[2]) -end; +end -proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int; +proc set_default_transition(current_state: TransitionState, default_action: TransitionAction, next_state: TransitionState) -> Int var - default_transition: Transition; - state_index: Int; + default_transition: Transition + state_index: Int begin default_transition.action := default_action; default_transition.next_state := next_state; @@ -619,7 +618,7 @@ begin transitions[state_index][cast(TransitionClass.other: Int) + 1] := default_transition; return state_index -end; +end (* * The transition table describes transitions from one state to another, given @@ -637,9 +636,9 @@ end; * For the meaning of actions see labels in the lex_next function, which * handles each action. *) -proc initialize_transitions(); +proc initialize_transitions() var - state_index: Int; + state_index: Int begin (* Start state. *) state_index := cast(TransitionState.start: Int) + 1; @@ -877,9 +876,9 @@ begin transitions[state_index][cast(TransitionClass.x: Int) + 1].action := nil; transitions[state_index][cast(TransitionClass.x: Int) + 1].next_state := TransitionState.finish -end; +end -proc lexer_make*(lexer: ^Lexer, input: ^FILE); +proc lexer_make*(lexer: ^Lexer, input: ^FILE) begin lexer^.input := input; lexer^.length := 0u; @@ -887,17 +886,17 @@ begin lexer^.buffer := cast(malloc(CHUNK_SIZE): ^Char); memset(cast(lexer^.buffer: Pointer), 0, CHUNK_SIZE); lexer^.size := CHUNK_SIZE -end; +end (* Returns the last read token. *) -proc lexer_current*(lexer: ^Lexer) -> LexerToken; +proc lexer_current*(lexer: ^Lexer) -> LexerToken var - current_class: TransitionClass; - current_state: TransitionState; - current_transition: Transition; - result: LexerToken; - index1: Word; - index2: Word; + current_class: TransitionClass + current_state: TransitionState + current_transition: Transition + result: LexerToken + index1: Word + index2: Word begin lexer^.current := lexer^.start; current_state := TransitionState.start; @@ -919,12 +918,12 @@ begin result.end_location := lexer^.current.location; return result -end; +end (* Read and return the next token. *) -proc lexer_lex*(lexer: ^Lexer) -> LexerToken; +proc lexer_lex*(lexer: ^Lexer) -> LexerToken var - result: LexerToken; + result: LexerToken begin if lexer^.length = 0u then lexer^.length := fread(cast(lexer^.buffer: Pointer), CHUNK_SIZE, 1u, lexer^.input); @@ -936,17 +935,17 @@ begin result := lexer_current(lexer); return result -end; +end -proc lexer_destroy*(lexer: ^Lexer); +proc lexer_destroy*(lexer: ^Lexer) begin free(cast(lexer^.buffer: Pointer)) -end; +end -proc lexer_initialize(); +proc lexer_initialize() begin initialize_classification(); initialize_transitions() -end; +end end. |
