gettoken.c
/* Notice of Copyright, License and Warranty
**
** This software is Copyright 1998, 1999, 2000 Jeffrey S. Dutky
** This software is licensed for use under the terms of the GNU General
** Public License (also called the GPL), a copy of which must be included
** with any distribution of this software. You may also find a copy of the
** GPL at the Free Software Foundation's web site at http://www.fsf.org/
** or http://www.gnu.org.
**
** This software is provided "as is" and without any express or implied
** warranties, including, without limitation, the implied waranties of
** merchantability and fitness for a particular purpose.
*/
#include <string.h>
#include "gettoken.h"
#define ACCUMULATE 0
#define RETURN 1
#define UNGET 2
#define A(TID,NEXT) {ACCUMULATE,TID,NEXT}
#define R(TID,NEXT) {RETURN,TID,NEXT}
#define U(TID,NEXT) {UNGET,TID,NEXT}
#define ___(STATE) {0,STATE,0}
#define WHITESPACE 1
#define NEWLINE 2
#define LETTER 3
#define HEXDIGIT 4
#define EXPONENT 5
#define FLOATSPEC 6
#define SIZESPEC 7
#define RADIX 8
#define PLUSMINUS 9
#define NUMBER 10
#define BRACES 11
#define ZERO 12
#define QUOTES 13
#define APOSTROPHE 14
#define BACKSLASH 15
#define SLASH 16
#define PERIOD 17
#define HASHMARK 18
#define ASTERISK 19
#define OTHER 20
#define WS WHITESPACE
#define NL NEWLINE
#define LT LETTER
#define HX HEXDIGIT
#define EX EXPONENT
#define FL FLOATSPEC
#define SZ SIZESPEC
#define RX RADIX
#define PM PLUSMINUS
#define DG NUMBER
#define BR BRACES
#define ZO ZERO
#define QU QUOTES
#define AP APOSTROPHE
#define BS BACKSLASH
#define SL SLASH
#define PD PERIOD
#define HM HASHMARK
#define ST ASTERISK
#define __ OTHER
#define NCLASSES 21
#define NSTATES 23
#define G GENERIC
#define X COMMENT
#define K KEYWORD
#define I INTEGER
#define F FLOAT
#define P PREPROCESSOR
#define S STRING
#define C CHARACTER
int cclass[256]={
WS, WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, NL, WS, WS, WS,
WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS,
WS, __, QU, HM, __, __, __, AP, __, __, ST, PM, __, PM, PD, SL,
ZO, DG, DG, DG, DG, DG, DG, DG, DG, DG, __, __, __, __, __, __,
__, HX, HX, HX, HX, EX, FL, LT, LT, LT, LT, LT, SZ, LT, LT, LT,
LT, LT, LT, LT, LT, SZ, LT, LT, RX, LT, LT, __, BS, __, __, LT,
__, HX, HX, HX, HX, EX, FL, LT, LT, LT, LT, LT, SZ, LT, LT, LT,
LT, LT, LT, LT, LT, SZ, LT, LT, RX, LT, LT, BR, __, BR, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __
};
#define ACTION(CLASS,STATE) (stab[CLASS][STATE][0])
#define TYPEID(CLASS,STATE) (stab[CLASS][STATE][1])
#define NEXTST(CLASS,STATE) (stab[CLASS][STATE][2])
int stab[NCLASSES][NSTATES][3]={
/* init key int num hex dot float exp pow size string str esc char char esc slash remark block block end prepro prepro esc fp spec pp-rem1 pp-rem2 */
/* */{___( 0),___( 1),___( 2),___( 3),___( 4),___( 5),___( 6),___( 7),___( 8),___( 9),___(10),___(11),___(12),___(13),___(14),___(15),___(16),___(17),___(18),___(19),___(20),___(21),___(22)},
/* */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* white */
/*cr*/{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),U(X, 0),A(X,16),A(X,16),R(P, 0),A(P,18),U(F, 0),U(P, 0),U(P, 0)},/* eoln */
/* A*/{A(K, 1),A(K, 1),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* alpha */
/*h */{A(K, 1),A(K, 1),U(I, 0),U(I, 0),A(I, 4),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* hex */
/* e */{A(K, 1),A(K, 1),A(I, 7),A(I, 7),A(I, 4),R(G, 0),A(F, 7),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* exp */
/* f */{A(K, 1),A(K, 1),A(F,20),A(F,20),A(I, 4),R(G, 0),A(F,20),U(F, 0),A(F,20),A(F,20),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),A(F,20),A(P,18),A(P,18)},/* float */
/*lu*/{A(K, 1),A(K, 1),A(I, 9),A(I, 9),A(I, 9),R(G, 0),A(F,20),U(F, 0),A(F,20),A(I, 9),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),A(F,20),A(P,18),A(P,18)},/* size */
/* x */{A(K, 1),A(K, 1),U(I, 0),A(I, 4),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* radix */
/* +*/{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),A(F, 8),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* sign */
/*09*/{A(I, 2),A(K, 1),A(I, 2),A(I, 2),A(I, 4),A(F, 6),A(F, 6),A(F, 8),A(F, 8),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* num */
/* {}*/{R(K, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* brace */
/* 0 */{A(I, 3),A(K, 1),A(I, 2),A(I, 2),A(I, 4),A(F, 6),A(F, 6),A(F, 8),A(F, 8),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* zero */
/* " */{A(S,10),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),R(S, 0),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* quote */
/* ' */{A(C,12),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),R(C, 0),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* apost */
/* \ */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,11),A(S,10),A(C,13),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,19),A(P,18),U(F, 0),A(P,18),A(P,18)},/* back */
/* / */{A(G,14),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),A(X,15),A(X,15),A(X,16),R(X, 0),U(P,21),A(P,18),U(F, 0),A(P,22),A(X,15)},/* slash */
/* . */{A(G, 5),U(K, 0),A(F, 6),A(F, 6),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* dot */
/* # */{A(P,18),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* hash */
/* * */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),A(X,16),A(X,15),A(X,17),A(X,17),A(P,18),A(P,18),U(F, 0),A(P,18),A(X,16)},/* star */
/* ? */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)} /* other */
};
#define CLASSIFY(C) ((C>=0 && C<256)?cclass[C]:OTHER)
static char *original="\"&<>"; /* translated characters */
static char *replaced[4]={""","&","<",">"};
int gettoken(FILE *f, char *token, int max, int tabwidth, int start){
static int st=0, col=0;
int going, cc, ch, i, n, rv;
char *p;
if(start)
st=0, col=0;
i=0;
going=1;
token[i]='\0';
cc=WHITESPACE;
while(going){
if(i>(max-7))
return TYPEID(cc,st);
if((ch=fgetc(f))==EOF)
return ENDOFFILE;
if(ch=='\n')
cc=NEWLINE, col=0;
else
cc=CLASSIFY(ch);
switch(ACTION(cc,st)){
case UNGET:
ungetc(ch,f);
going=0;
break;
case RETURN:
going=0;
default:
if(ch=='\t'){
token[i++]=' ';
token[i]='\0';
col++;
while(col%tabwidth){
token[i++]=' ';
token[i]='\0';
col++;
}
}else{
p=strchr(original,ch);
if(p){
i=strlen(strcat(token,replaced[(p-original)%4]));
}else{
token[i++]=ch;
}
col++;
}
token[i]='\0';
break;
}
rv=TYPEID(cc,st);
st=NEXTST(cc,st);
}
return rv;
}