; > Tokens ; Token table - ok ; Tokeniser - ok ; Detokeniser - not yet ; Acorn-style token table ; ======================= ; string, token, flag ; ; Token flag: ; Bit 0 - Conditional tokenisation (don't tokenise if followed by an alphabetic character). ; Bit 1 - Not start of statement. ; Bit 2 - Now middle of statement (command with parameters) ; Bit 3 - Expect a line number (after a GOTO, etc...). ; Bit 4 - Pseudo variable - add &40 token if at start of statement (external: hex number). ; Bit 5 - FN/PROC keyword - don't tokenise name of the subroutine. ; Bit 6 - Don't tokenise rest of line (REM, DATA, etc...) ; Bit 7 - Unused (external: quote toggle) .TokenTable EQUB "OSCLI" ,&FF,&06 ; 00000110 ; Put here temp'y for speed EQUB "PRINT" ,&F1,&06 ; 00000110 ; Put here temp'y for speed EQUB "AND" ,&80,&02 ; 00000010 EQUB "ABS" ,&94,&02 ; 00000010 EQUB "ACS" ,&95,&02 ; 00000010 EQUB "ADVAL" ,&96,&02 ; 00000010 EQUB "ASC" ,&97,&02 ; 00000010 EQUB "ASN" ,&98,&02 ; 00000010 EQUB "ATN" ,&99,&02 ; 00000010 EQUB "AUTO" ,&C6,&0A ; 00001010 EQUB "BGET" ,&9A,&03 ; 00000011 EQUB "BPUT" ,&D5,&07 ; 00000111 EQUB "COLOUR" ,&FB,&06 ; 00000110 EQUB "CALL" ,&D6,&06 ; 00000110 EQUB "CHAIN" ,&D7,&06 ; 00000110 EQUB "CHR$" ,&BD,&02 ; 00000010 EQUB "CLEAR" ,&D8,&03 ; 00000011 EQUB "CLOSE" ,&D9,&07 ; 00000111 EQUB "CLG" ,&DA,&03 ; 00000011 EQUB "CLS" ,&DB,&03 ; 00000011 EQUB "COS" ,&9B,&02 ; 00000010 EQUB "COUNT" ,&9C,&03 ; 00000011 EQUB "COLOR" ,&FB,&06 ; 00000110 EQUB "DATA" ,&DC,&42 ; 01000010 EQUB "DEG" ,&9D,&02 ; 00000010 EQUB "DEF" ,&DD,&02 ; 00000010 EQUB "DELETE" ,&C7,&0A ; 00001010 EQUB "DIV" ,&81,&02 ; 00000010 EQUB "DIM" ,&DE,&06 ; 00000110 EQUB "DRAW" ,&DF,&06 ; 00000110 EQUB "ENDPROC" ,&E1,&03 ; 00000011 EQUB "END" ,&E0,&03 ; 00000011 EQUB "ENVELOPE",&E2,&06 ; 00000110 EQUB "ELSE" ,&8B,&08 ; 00001000 EQUB "EVAL" ,&A0,&02 ; 00000010 EQUB "ERL" ,&9E,&03 ; 00000011 EQUB "ERROR" ,&85,&00 ; 00000000 EQUB "EOF" ,&C5,&03 ; 00000011 EQUB "EOR" ,&82,&02 ; 00000010 EQUB "ERR" ,&9F,&03 ; 00000011 EQUB "EXP" ,&A1,&02 ; 00000010 EQUB "EXT" ,&A2,&03 ; 00000011 EQUB "EDIT" ,&CE,&0A ; 00001010 EQUB "FOR" ,&E3,&06 ; 00000110 EQUB "FALSE" ,&A3,&03 ; 00000011 EQUB "FN" ,&A4,&22 ; 00100010 EQUB "GOTO" ,&E5,&0E ; 00001110 EQUB "GET$" ,&BE,&02 ; 00000010 EQUB "GET" ,&A5,&02 ; 00000010 EQUB "GOSUB" ,&E4,&0E ; 00001110 EQUB "GCOL" ,&E6,&06 ; 00000110 EQUB "HIMEM" ,&93,&17 ; 00010111 EQUB "INPUT" ,&E8,&06 ; 00000110 EQUB "IF" ,&E7,&06 ; 00000110 EQUB "INKEY$" ,&BF,&02 ; 00000010 EQUB "INKEY" ,&A6,&02 ; 00000010 EQUB "INT" ,&A8,&02 ; 00000010 EQUB "INSTR(" ,&A7,&02 ; 00000010 EQUB "LIST" ,&C9,&0A ; 00001010 EQUB "LINE" ,&86,&02 ; 00000010 EQUB "LOAD" ,&C8,&06 ; 00000110 EQUB "LOMEM" ,&92,&17 ; 00010111 EQUB "LOCAL" ,&EA,&06 ; 00000110 EQUB "LEFT$(" ,&C0,&02 ; 00000010 EQUB "LEN" ,&A9,&02 ; 00000010 EQUB "LET" ,&E9,&00 ; 00000000 EQUB "LOG" ,&AB,&02 ; 00000010 EQUB "LN" ,&AA,&02 ; 00000010 EQUB "MID$(" ,&C1,&02 ; 00000010 EQUB "MODE" ,&EB,&06 ; 00000110 EQUB "MOD" ,&83,&02 ; 00000010 EQUB "MOVE" ,&EC,&06 ; 00000110 EQUB "NEXT" ,&ED,&06 ; 00000110 EQUB "NEW" ,&CA,&03 ; 00000011 EQUB "NOT" ,&AC,&02 ; 00000010 EQUB "OLD" ,&CB,&03 ; 00000011 EQUB "ON" ,&EE,&06 ; 00000110 EQUB "OFF" ,&87,&02 ; 00000010 EQUB "OR" ,&84,&02 ; 00000010 EQUB "OPENIN" ,&8E,&02 ; 00000010 EQUB "OPENOUT" ,&AE,&02 ; 00000010 EQUB "OPENUP" ,&AD,&02 ; 00000010 ;;EQUB "OSCLI" ,&FF,&06 ; 00000110 ;;EQUB "PRINT" ,&F1,&06 ; 00000110 EQUB "PAGE" ,&90,&17 ; 00010111 EQUB "PTR" ,&8F,&17 ; 00010111 EQUB "PI" ,&AF,&03 ; 00000011 EQUB "PLOT" ,&F0,&06 ; 00000110 EQUB "POINT(" ,&B0,&02 ; 00000010 EQUB "PROC" ,&F2,&26 ; 00100110 EQUB "POS" ,&B1,&03 ; 00000011 EQUB "RETURN" ,&F8,&03 ; 00000011 EQUB "REPEAT" ,&F5,&02 ; 00000010 EQUB "REPORT" ,&F6,&03 ; 00000011 EQUB "READ" ,&F3,&06 ; 00000110 EQUB "REM" ,&F4,&42 ; 01000010 EQUB "RUN" ,&F9,&03 ; 00000011 EQUB "RAD" ,&B2,&02 ; 00000010 EQUB "RESTORE" ,&F7,&0E ; 00001110 EQUB "RIGHT$(" ,&C2,&02 ; 00000010 EQUB "RND" ,&B3,&03 ; 00000011 EQUB "RENUMBER",&CC,&0A ; 00001010 EQUB "STEP" ,&88,&02 ; 00000010 EQUB "SAVE" ,&CD,&06 ; 00000110 EQUB "SGN" ,&B4,&02 ; 00000010 EQUB "SIN" ,&B5,&02 ; 00000010 EQUB "SQR" ,&B6,&02 ; 00000010 EQUB "SPC" ,&89,&02 ; 00000010 EQUB "STR$" ,&C3,&02 ; 00000010 EQUB "STRING$(",&C4,&02 ; 00000010 EQUB "SOUND" ,&D4,&06 ; 00000110 EQUB "STOP" ,&FA,&03 ; 00000011 EQUB "TAN" ,&B7,&02 ; 00000010 EQUB "THEN" ,&8C,&08 ; 00001000 EQUB "TO" ,&B8,&02 ; 00000010 EQUB "TAB(" ,&8A,&02 ; 00000010 EQUB "TRACE" ,&FC,&0E ; 00001110 EQUB "TIME" ,&91,&17 ; 00010111 EQUB "TRUE" ,&B9,&03 ; 00000011 EQUB "UNTIL" ,&FD,&06 ; 00000110 EQUB "USR" ,&BA,&02 ; 00000010 EQUB "VDU" ,&EF,&06 ; 00000110 EQUB "VAL" ,&BB,&02 ; 00000010 EQUB "VPOS" ,&BC,&03 ; 00000011 EQUB "WIDTH" ,&FE,&06 ; 00000110 EQUB "PAGE" ,&D0,&02 ; 00000010 EQUB "PTR" ,&CF,&02 ; 00000010 EQUB "TIME" ,&D1,&02 ; 00000010 EQUB "LOMEM" ,&D2,&02 ; 00000010 EQUB "HIMEM" ,&D3,&02 ; 00000010 EQUB &00 ALIGN ; Tokeniser ; ========= ; On entry, R5=>untokenised text ; R4=>destination buffer ; Uses, R3=>token table address ; R2= current tokeniser flags ; R1= new tokeniser flags ; R0= character ; On exit, R5=> at end of input line ; R4=> at end of output line .Tokenise .TokenZero mov #0,r2 ; Clear tokeniser flags .TokenLoop movb (r5),r0 ; Get current character bit #&F0,r2 ; Any skip flags set? bne TokenByte ; Inside quote/REM/FN/hex bit #&08,r2 ; Is a line number expected? beq TokenNotLine ; No, try to tokenise jsr pc,CheckDigit ; Is character a digit? bcs TokenNotLine ; Not a digit, try to tokenise jsr pc,TokeniseNumber ; Try to tokenise number bcs TokenZero ; Couldn't tokenise, scan as number br TokenLoop ; Continue tokenising .TokenNotLine cmp r0,#ASC"A" ; Tokens start with a letter bcs TokenByte ; <'A', enter character cmp r0,#ASC"Z" bcc TokenByte ; >'Z', enter character adr TokenTable,r3 ; Point to token table jsr pc,SearchTable ; Search token table ; Returns r5=>before next character ; r4= unchanged, output pointer ; r2= unchanged, current tokeniser flags ; r1= new tokeniser flag ; r0= byte to enter, token or char bit #2,r2 ; Are we at the start of statement? bne TokenWord ; No, enter token/char bit #16,r1 ; Is this a pseudo-token? beq TokenWord ; No, enter unchanged add #&40,r0 ; Convert token to command token .TokenWord mov r1,r2 ; Copy new flags to current flags bic #16,r2 ; Clear pseudo-token/hex flag .TokenByte movb r0,(r4) ; Enter byte in output buffer cmp r0,#13 ; At end of line? bne TokenNotCR ; No, jump to next check rts pc ; Return at end of line ; R5=> at end of source ; R4=> at end of dest .TokenNotCR inc r5 ; Increment input pointer inc r4 ; Increment output pointer cmp r0,#34 ; Is char quote? bne TokenNotQuote ; No, jump to next check xor r2,#128 ; Toggle quote flag ; equw &7897 ; Assembler bug ; equw &0080 br TokenLoop ; Loop back to continue tokenising .TokenNotQuote bit #&C0,r2 ; Inside quotes or REM/DATA/*cmd? bne TokenLoop ; Loop back, ignoring character cmp r0,#&3A ; Is char colon? beq TokenZero ; Loop back to reset to start of statement cmp r0,#ASC"*" ; Is char star? bne TokenNotStar ; No, jump to next check bit #2,r2 ; At start of statement? bne TokenLoop ; No, treat as normal character mov #64,r2 ; Treat rest of line as comment br TokenLoop ; Jump back to continue scanning line .TokenNotStar cmp r0,#ASC"&" ; Is char hex number? bne TokenNotHex ; No, jump to next check mov #16,r2 ; Set 'scanning hex number' br TokenLoop ; Continue scanning line .TokenNotHex bit #&30,r2 ; Scanning PROC/FN or hex? beq TokenLoop ; No, jump back to continue cmp r0,#ASC"0" ; r0<'0' C=1, r0>='0' C=0 bcc TokenHexDigit .TokenHexEnd bic #&30,r2 ; Clear PROC/FN/Hex flag br TokenLoop ; Loop back to continue scanning .TokenHexDigit cmp r0,#ASC"9"+1 ; r0<':' C=1, r0>=':' C=0 bcs TokenLoop ; Still hex/name, loop back to continue cmp r0,#ASC"A" ; r0<'A' C=1, r0>='A' C=0 bcs TokenHexEnd ; End of name, clear flag and loop back bit #32,r2 ; Scanning PROC/FN? bne TokenPROCFN cmp r0,#ASC"F"+1 ; r0<'G' C=1, r0>='G' C=0 bcc TokenHexEnd ; End of hex number br TokenLoop ; Loop to continue scanning .TokenPROCFN cmp r0,#ASC"Z"+1 ; r0<'[' C=1, r0>=']' C=0 bcs TokenLoop ; Still an identifier cmp r0,#ASC"_" ; r0<'_' C=1, r0>='_' C=0 bcs TokenHexEnd ; End of identifier cmp r0,#ASC"z"+1 ; r0<'{' C=1, r0>='}' C=0 bcc TokenHexEnd ; End of identifier br TokenLoop ; Loop to continue scanning .TokeniseNumber sec rts pc ; Search token table ; ================== ; On entry, R5=>input string to match ; R4=>dest ; R3=>token table ; R2= current flags ; R0= current char ; On exit, R5=>last char ; R4=unchanged ; R3 corrupted ; R2=old flags ; R1=new flags ; R0=token or character ;NC=token matched, R0=token, R5=>end of matched string ; C=not matched, R0=byte, R5=>current char .SearchTable mov r5,r1 ; Save source pointer .SearchLoop movb (r5),r0 ; Get source character cmpb r0,(r3) ; Compare with token character beq SearchMatch ; Match, check if full match cmp r0,#ASC"." ; Abbreviation? beq SearchDot ; Jump to match abbreviation .SearchNext inc r3 ; Step past this token movb (r3),r0 bit #128,r0 beq SearchNext ; Loop until token byte inc r3 ; Step past token byte .SearchNextBack mov r1,r5 ; Restore source pointer inc r3 ; Step past flag to next token movb (r3),r0 ; Get next byte cmp r0,#0 ; Is it table terminator? bne SearchLoop ; Loop back to check next token movb (r5),r0 ; Get first char back mov r2,r1 ; new flags=old flags rts pc ; R0=char, R1=old flag, R2=old flag, R3=corrupted, R4=preserved, R5=>current char ; flags=CC .SearchDot inc r3 ; Step to end of token movb (r3),r0 bit #128,r0 beq SearchDot ; Loop until token byte fetched inc r3 ; Step to flag byte movb (r3),r1 ; Get new flags sec rts pc ; R0=token, R1=new flag, R2=old flag, R3=corrupted, R4=preserved, R5=>last char ; flags=CS .SearchMatch inc r5 ; Step to next source char inc r3 ; Step to next token char movb (r3),r0 ; Get next byte bit #&80,r0 ; Is this a token byte? beq SearchLoop ; No, loop to check next character inc r3 ; Point to flag ;bit #2,r2 ; Start of statement? ;beq SearchFound ; Start of statement, don't check alpha bitb #1,(r3) ; Needs nonalpha terminator? beq SearchFound ; No nonalpha needed, token matched movb (r5),r0 ; Get following source character cmp r0,#ASC"A" bcs SearchFound ; <'A', matched cmp r0,#ASC"Z"+1 bcc SearchFound ; >'Z', matched ; copy source to dest ; r4=>dest ; r5=>last source char+1 ; r1=>first source char .SearchAlpha movb (r1)+,r0 movb r0,(r4)+ cmp r1,r5 bne SearchAlpha dec r4 dec r5 mov r2,r1 clc rts pc .SearchFound dec r5 ; Point to last character of source movb (r3),r1 ; Get new token flag dec r3 ; Point to token byte movb (r3),r0 ; Get token byte sec rts pc ; R0=token, R1=new flag, R5=last matched char ; flags=CS .PrintLineNumber jsr pc,PrintInline equs "LINENUM",0 align rts pc .FindTOP .SetTOP .LineDelete .LineFind rts pc ; Check if character in A is a digit ; Returns CC if digit, CS if nondigit .CheckDigit cmp r0,#ASC"0" ; r0<'0' C=1, r0>='0' C=0 bcs CheckDigitExit ; Exit with CS if <'0' cmp #ASC"9",r0 ; '9'=r0 C=0 ; r0>'9' C=1, r0<='9' C=0 .CheckDigitExit ; Exit with CS if >'9' rts pc ; Detokeniser ; =========== .Detokenise jsr pc,PrintInline equs "TOKEN",0 align rts pc