gettoken.c

/* Notice of Copyright, License and Warranty
**
** This software is Copyright 1998, 1999, 2000 Jeffrey S. Dutky
** This software is licensed for use under the terms of the GNU General
** Public License (also called the GPL), a copy of which must be included
** with any distribution of this software. You may also find a copy of the
** GPL at the Free Software Foundation's web site at http://www.fsf.org/
** or http://www.gnu.org.
**
** This software is provided "as is" and without any express or implied
** warranties, including, without limitation, the implied waranties of
** merchantability and fitness for a particular purpose.
*/

#include <string.h>
#include "gettoken.h"

#define ACCUMULATE 0
#define RETURN 1
#define UNGET 2

#define A(TID,NEXT) {ACCUMULATE,TID,NEXT}
#define R(TID,NEXT) {RETURN,TID,NEXT}
#define U(TID,NEXT) {UNGET,TID,NEXT}
#define ___(STATE) {0,STATE,0}

#define WHITESPACE 1
#define NEWLINE 2
#define LETTER 3
#define HEXDIGIT 4
#define EXPONENT 5
#define FLOATSPEC 6
#define SIZESPEC 7
#define RADIX 8
#define PLUSMINUS 9
#define NUMBER 10
#define BRACES 11
#define ZERO 12
#define QUOTES 13
#define APOSTROPHE 14
#define BACKSLASH 15
#define SLASH 16
#define PERIOD 17
#define HASHMARK 18
#define ASTERISK 19
#define OTHER 20

#define WS WHITESPACE
#define NL NEWLINE
#define LT LETTER
#define HX HEXDIGIT
#define EX EXPONENT
#define FL FLOATSPEC
#define SZ SIZESPEC
#define RX RADIX
#define PM PLUSMINUS
#define DG NUMBER
#define BR BRACES
#define ZO ZERO
#define QU QUOTES
#define AP APOSTROPHE
#define BS BACKSLASH
#define SL SLASH
#define PD PERIOD
#define HM HASHMARK
#define ST ASTERISK
#define __ OTHER

#define NCLASSES 21
#define NSTATES 23

#define G GENERIC
#define X COMMENT
#define K KEYWORD
#define I INTEGER
#define F FLOAT
#define P PREPROCESSOR
#define S STRING
#define C CHARACTER

int cclass[256]={
WS, WS, WS, WS, WS, WS, WS, WS, WS, NL, WS, WS, NL, WS, WS, WS,
WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS, WS,
WS, __, QU, HM, __, __, __, AP, __, __, ST, PM, __, PM, PD, SL,
ZO, DG, DG, DG, DG, DG, DG, DG, DG, DG, __, __, __, __, __, __,
__, HX, HX, HX, HX, EX, FL, LT, LT, LT, LT, LT, SZ, LT, LT, LT,
LT, LT, LT, LT, LT, SZ, LT, LT, RX, LT, LT, __, BS, __, __, LT,
__, HX, HX, HX, HX, EX, FL, LT, LT, LT, LT, LT, SZ, LT, LT, LT,
LT, LT, LT, LT, LT, SZ, LT, LT, RX, LT, LT, BR, __, BR, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __
};

#define ACTION(CLASS,STATE) (stab[CLASS][STATE][0])
#define TYPEID(CLASS,STATE) (stab[CLASS][STATE][1])
#define NEXTST(CLASS,STATE) (stab[CLASS][STATE][2])

int stab[NCLASSES][NSTATES][3]={
/*                 init          key            int             num         hex            dot            float          exp             pow           size           string        str esc      char         char esc   slash       remark        block     block end   prepro     prepro esc  fp spec    pp-rem1  pp-rem2 */
/*    */{___( 0),___( 1),___( 2),___( 3),___( 4),___( 5),___( 6),___( 7),___( 8),___( 9),___(10),___(11),___(12),___(13),___(14),___(15),___(16),___(17),___(18),___(19),___(20),___(21),___(22)},
/*    */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* white */
/*cr*/{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),U(X, 0),A(X,16),A(X,16),R(P, 0),A(P,18),U(F, 0),U(P, 0),U(P, 0)},/* eoln */
/* A*/{A(K, 1),A(K, 1),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* alpha */
/*h */{A(K, 1),A(K, 1),U(I, 0),U(I, 0),A(I, 4),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* hex */
/* e */{A(K, 1),A(K, 1),A(I, 7),A(I, 7),A(I, 4),R(G, 0),A(F, 7),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* exp */
/* f  */{A(K, 1),A(K, 1),A(F,20),A(F,20),A(I, 4),R(G, 0),A(F,20),U(F, 0),A(F,20),A(F,20),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),A(F,20),A(P,18),A(P,18)},/* float */
/*lu*/{A(K, 1),A(K, 1),A(I, 9),A(I, 9),A(I, 9),R(G, 0),A(F,20),U(F, 0),A(F,20),A(I, 9),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),A(F,20),A(P,18),A(P,18)},/* size */
/* x */{A(K, 1),A(K, 1),U(I, 0),A(I, 4),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* radix */
/* +*/{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),A(F, 8),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* sign */
/*09*/{A(I, 2),A(K, 1),A(I, 2),A(I, 2),A(I, 4),A(F, 6),A(F, 6),A(F, 8),A(F, 8),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* num */
/* {}*/{R(K, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* brace */
/* 0 */{A(I, 3),A(K, 1),A(I, 2),A(I, 2),A(I, 4),A(F, 6),A(F, 6),A(F, 8),A(F, 8),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* zero */
/* " */{A(S,10),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),R(S, 0),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* quote */
/*  ' */{A(C,12),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),R(C, 0),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* apost */
/* \ */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,11),A(S,10),A(C,13),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,19),A(P,18),U(F, 0),A(P,18),A(P,18)},/* back */
/* / */{A(G,14),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),A(X,15),A(X,15),A(X,16),R(X, 0),U(P,21),A(P,18),U(F, 0),A(P,22),A(X,15)},/* slash */
/*  .  */{A(G, 5),U(K, 0),A(F, 6),A(F, 6),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* dot */
/* # */{A(P,18),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)},/* hash */
/* * */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),A(X,16),A(X,15),A(X,17),A(X,17),A(P,18),A(P,18),U(F, 0),A(P,18),A(X,16)},/* star */
/* ?  */{R(G, 0),U(K, 0),U(I, 0),U(I, 0),U(I, 0),R(G, 0),U(F, 0),U(F, 0),U(F, 0),U(I, 0),A(S,10),A(S,10),A(C,12),A(C,12),U(G, 0),A(X,15),A(X,16),A(X,16),A(P,18),A(P,18),U(F, 0),A(P,18),A(P,18)} /* other */
};

#define CLASSIFY(C) ((C>=0 && C<256)?cclass[C]:OTHER)

static char *original="\"&<>"; /* translated characters */
static char *replaced[4]={"&quot;","&amp;","&lt;","&gt;"};

int gettoken(FILE *f, char *token, int max, int tabwidth, int start){
   static int st=0, col=0;
   int going, cc, ch, i, n, rv;
   char *p;
   
   if(start)
       st=0, col=0;
   i=0;
   going=1;
   token[i]='\0';
   cc=WHITESPACE;
   while(going){
       if(i>(max-7))
           return TYPEID(cc,st);
       if((ch=fgetc(f))==EOF)
           return ENDOFFILE;
       if(ch=='\n')
           cc=NEWLINE, col=0;
       else
           cc=CLASSIFY(ch);
       switch(ACTION(cc,st)){
       case UNGET:
           ungetc(ch,f);
           going=0;
           break;
       case RETURN:
           going=0;
       default:
           if(ch=='\t'){
               token[i++]=' ';
               token[i]='\0';
               col++;
               while(col%tabwidth){
                   token[i++]=' ';
                   token[i]='\0';
                   col++;
               }
           }else{
               p=strchr(original,ch);
               if(p){
                   i=strlen(strcat(token,replaced[(p-original)%4]));
               }else{
                   token[i++]=ch;
               }
               col++;
           }
           token[i]='\0';
           break;
       }
       rv=TYPEID(cc,st);
       st=NEXTST(cc,st);
   }
   return rv;
}