-- $Source: /commtar/monoBANK/RTS/lex_fil.sub,v $ -- $Revision: 1.4 $ -- $Date: 87/11/10 13:53:49 $ -- $Author: fitch $ pragma revision ("$Revision: 1.4 $"); -- $Source: /commtar/monoBANK/RTS/lex_fil.sub,v $ -- $Revision: 1.4 $ -- $Date: 87/11/10 13:53:49 $ -- $Author: fitch $ ------------------------------------------------------------- separate (TEXT_IO) procedure RTS_LEX_FILE (FILE : FILE_TYPE; STATE_TABLE: RTS_LEXER.STATE_TRANSITIONS; CHAR_TO_TOKEN_TABLE: RTS_LEXER.CHAR_TO_TOKEN_MAPPING; WIDTH: FIELD; RESULT: out STRING; -- resulting lexical unit withOUT -- trailing blanks from file -- value in result(result'first .. LAST) LAST : out NATURAL ------------------/\ ) is --+----------------------------------------------------- --| Overview --| Lexes the next token in FILE according to STATE_TABLE and --| CHAR_TO_TOKEN_TABLE. If the token is Accept according to the --| tables, then the token without leading or trailing blanks --| is returned in RESULT(RESULT'first .. LAST). Otherwise, --| DATA_ERROR or END_ERROR are raised. --| --| Effect --| The effect is as specified for GET(file) procedures in enum_io, --| integer_io, fixed_io, and float_io: as to effect of WIDTH, when --| DATA_ERROR and END_ERROR are raised, and which character will be --| the next one read from the file. --| --| Algorithm --| 1. SKIP_BLANKS is called to skip spaces and tabs (and when width=0, --| line and page terminators), such that a call to NEXT will return --| the first non-blank character. --| 2. LEXER is called to "lex" the desired token, using NEXT. --| 3. NEXT returns a single "lookahead" character to LEXER each time it --| is called, and also "consumes" the previous lookahead character --| removing it from the file and storing it into RESULT. --| 4. If WIDTH/=0, fewer than WIDTH chars have been read, and the --| file is not at end-of-line, then (a) the syntax is bad, and --| (b) we need to read those remaining characters, or up to EOL. --| 5. DATA_ERROR is raised if the final state isn't OK; else LAST --| is given its appropriate value. --| --| Requires --| RESULT is a non-null string, big enough to hold the lexical item --| when stripped of leading/trailing blanks. The FSA specified by --| TABLEs are such that all accept transitions are made whenever a --| charcater which can't be a legal part of the lexical element is --| read. (NEXT returns a space when it reaches EOL or reads WIDTH --| characters.) use RTS_LEXER; dbg : boolean := false; -- add "constant" to entirely eliminate all debugging code. Num_Chars_Read : Natural := 0; Last_Index : Natural := RESULT'first - 1; -- Index into result string NEXT_CHAR : character; -- "OWN" variable of function Next result_state : state; -- result of LEXER Should_Advance_File : boolean := FALSE; -- Controls whether NEXT should actually "ADVANCE" the file, thereby -- "READING" the previously examined character. Should_Advance is -- false under these conditions -- 1. First time NEXT is called. This is because SKIP_BLANKS -- has already examined the first non-blank, and it should -- not be "READ" until the Lexer has examined it. -- 2. If EOL has been read, then the file should NOT advance. -- The Lexer probably won't call NEXT after hitting EOL, -- but if WIDTH /= 0 and the lexer finishes without reading -- WIDTH number of characters, then we need to perform -- that many calls to NEXT, and then raise DATA_ERROR -- 3. WIDTH /= 0 and WIDTH number of characters have been read. function NEXT return character; function MY_LEXER is new LEXER (next => NEXT); procedure SKIP_BLANKS --+----------------------------------------------------- --| Overview --| Skips leading blanks (= ' ' or horizontal_tab), and if --| width = 0, also line and page terminators. --| On completion, a call to NEXT will return the first non-blank --| --| Exceptions: --| END_ERROR for width = 0 and only blanks and terminators remain --| in file. --| END_ERROR for width /= 0 only if positioned at end-of-file --| --| DATA_ERROR for width /= 0 and all blanks WIDTH position, or up to --| first EOL. is CHAR : Character; Where : PEEK_PLACE; begin -- Look for the first non blank-like character. if Width /= 0 then -- Stop at a terminator Where := EOL; else -- Don't stop until non-terminator Where := NON_TERMINATOR; end if; loop -- Gobble leading spaces/terminators PEEK(FILE, Where, CHAR); if FILE.WHERE in EOL_Range then raise DATA_ERROR; -- Only possible if Width /= 0; end if; exit when CHAR /= ' ' and then CHAR /= ascii.HT ; if dbg then --dbg_on text_io.put_line("skip_blanks: just skipped a blank"); end if; --dbg_off ADVANCE(FILE); Num_Chars_Read := Num_Chars_Read + 1; if Num_Chars_Read = Width then raise DATA_ERROR; end if; end loop; end SKIP_BLANKS; function Next return character is --+----------------------------------------------------- --| Overview --| "CONSUMES" the previous lookahead character and returns --| the next lookahead character, incrementing Num_Chars_Read --| each time a character is consumed and putting it into the --| next position of RESULT --| --| Requires --| SKIP_BLANKS is called before Next is called the first time. --| --| Modifies --| Num_Chars_Read, RESULT, and Last_Index --| NEXT_CHAR is used as a temporary, but MUST BE an OWN variable (i.e., --| global), because its value is set on one call, and stored away in --| RESULT on the next call. --| Should_Advance_File begin if Should_Advance_File then --Gobble previous character and save it away in result Num_Chars_Read := Num_Chars_Read + 1; Last_Index := Last_Index + 1; RESULT (Last_Index) := NEXT_CHAR; ADVANCE (FILE); -- gobble up the character else -- Don't advance file this time, but DO advance it next -- time (unless should-advance is set false again when -- at EOL or width /= 0 and have already read WIDTH chars Should_Advance_File := TRUE; end if; if Width /= 0 and then Num_Chars_Read >= Width then -- End of width: Don't read any more, but return space -- so Lexer can ACCEPT, if string is OK Should_Advance_File := FALSE; NEXT_CHAR := ' '; else PEEK(FILE, EOL, NEXT_CHAR); -- Look at next character if FILE.WHERE in EOL_Range then -- Line/page/file terminator, return a space, and don't advanc Should_Advance_File := FALSE; NEXT_CHAR := ' '; end if; end if; -- width /= 0 if dbg then --dbg_on text_io.put_line("NEXT: returning :" & next_char & ':' ); end if; --dbg_off return (NEXT_CHAR); end Next; begin if Mode(File) -- will raise STATUS_ERROR if FILE not open /= In_File then raise MODE_ERROR; end if; SKIP_BLANKS; Result_State := MY_LEXER (state_table, char_to_token_table); if dbg then --dbg_on text_io.new_line; text_io.put_line("lex_file: after calling lexer. width, num_char," & "last_index, result_state"); text_io.put(integer'image(width)); text_io.put(integer'image(num_chars_read)); text_io.put(integer'image(last_index)); text_io.put_line(state'image(result_state)); end if; --dbg_off -- For the case of width /= 0, check that WIDTH chars were -- read (or that EOL) was hit. If not then READ remaining -- width chars up to EOL, and then raise DATA_ERROR -- because it is required that the ENTIRE width number of -- characters satisfy the syntax table: if some chars are left -- then there must be trailing blanks and other characters if width > Num_Chars_Read and then FILE.WHERE not in EOL_range then for i in Num_Chars_Read + 1 .. WIDTH loop ADVANCE (FILE); PEEK (FILE, EOL, NEXT_CHAR); exit when FILE.WHERE in EOL_range; -- hit end of line end loop; raise DATA_ERROR; end if; -- Now check on final state of LEXER if Result_State = Accepted then LAST := Last_Index; else raise DATA_ERROR; end if; end RTS_LEX_FILE; -- $Cprt start$ -- -- Copyright (C) 1988 by Intermetrics, Inc. -- -- This material may be used duplicated or disclosed by or for the -- U.S. Government pursuant to the copyright license under DAR clause -- 7-104.9(a) (May 1981). -- -- This project was spnsored by the STARS Foundation -- Naval Research Laboratory, Washington DC -- -- $Cprt end$