lua-users home
lua-l archive

[Date Prev][Date Next][Thread Prev][Thread Next] [Date Index] [Thread Index]


Here is one (flex spec), which should be
pretty close to Lua 5.1 lexer.
Not fully tested, but working.

(4 files follows)

--- file: lua.l ---
/*
* lua.l - flex lexer for Lua 5.1
* Copyright: Same as Lua
*/

%{
int yywrap( ){return 1;}
#include "y.tab.h"
%}

w              [ \t\v\a]+
o              [ \t\v\a]*
name           [_a-zA-Z][_a-zA-Z0-9]*
n              [0-9]+
exp            [Ee][+-]?{n}
number         ({n}|{n}[.]{n}){exp}?


%x XLONGSTRING
%x XSHORTCOMMENT
%x XLONGCOMMENT
%x XSTRINGQ
%x XSTRINGA

%%

^#!.*          fprintf(yyout,"skipping: %s\n", yytext);
and            return TK_AND;
break          return TK_BREAK;
do             return TK_DO;
else           return TK_ELSE;
elseif         return TK_ELSEIF;
end            return TK_END;
false          return TK_FALSE;
for            return TK_FOR;
function       return TK_FUNCTION;
if             return TK_IF;
in             return TK_IN;
local          return TK_LOCAL;
nil            return TK_NIL;
not            return TK_NOT;
or             return TK_OR;
repeat         return TK_REPEAT;
return         return TK_RETURN;
then           return TK_THEN;
true           return TK_TRUE;
until          return TK_UNTIL;
while          return TK_WHILE;

{number}       return TK_NUMBER;
{name}         return TK_NAME;

"--[["         yymore(); BEGIN( XLONGCOMMENT );
"--"           yymore(); BEGIN( XSHORTCOMMENT );

"[["({o}\n)?   yymore();BEGIN( XLONGSTRING );

{w}            return TK_WHITESPACE;
"..."          return TK_DOTS;
".."           return TK_CONCAT;
"=="           return TK_EQ;
">="           return TK_GE;
"<="           return TK_LE;
"~="           return TK_NE;
"-"            return yytext[0];
"+"            return yytext[0];
"*"            return yytext[0];
"/"            return yytext[0];
"="            return yytext[0];
">"            return yytext[0];
"<"            return yytext[0];
"("            return yytext[0];
")"            return yytext[0];
"["            return yytext[0];
"]"            return yytext[0];
"{"            return yytext[0];
"}"            return yytext[0];
\n             return TK_NEWLINE;
\r             return TK_NEWLINE;
\"             yymore(); BEGIN(XSTRINGQ);
'              yymore(); BEGIN(XSTRINGA);
.              return yytext[0];

<XSTRINGQ>
{
  \"\"        yymore();
  \"          BEGIN(0); return TK_STRING;
  \\[abfnrtv] yymore();
  \\\n        yymore();
  \\\"        yymore();
  \\'         yymore();
  \\"["       yymore();
  \\"]"       yymore();
  [\n|\r]     {    fprintf(yyout,"unterminated string.\n");
                      BEGIN(0);
                      return TK_STRING;
                  }
  .           yymore();
}

<XSTRINGA>
{
  ''          yymore();
  '           BEGIN(0); return TK_STRING;
  \\[abfnrtv] yymore();
  \\\n        yymore();
  \\\"        yymore();
  \\'         yymore();
  \\"["       yymore();
  \\"]"       yymore();
  [\n|\r]     {    fprintf(yyout,"unterminated string.\n");
                      BEGIN(0);
                      return TK_STRING;
                  }
  .           yymore();
}

<XLONGSTRING>
{
  "]]"        BEGIN(0); return TK_LONGSTRING;
  \n          yymore();
  \r          yymore();
  .           yymore();
}

<XSHORTCOMMENT>
{
  \n          BEGIN(0); return TK_SHORTCOMMENT;
  \r          BEGIN(0); return TK_SHORTCOMMENT;
  .           yymore();
}

<XLONGCOMMENT>
{
  "]]--"      BEGIN(0); return TK_LONGCOMMENT;
  \n          yymore();
  \r          yymore();
  .           yymore();
}

%%

#ifdef YYMAIN

#include <stdio.h>
extern FILE*yyin,*yyout;

char* TokenName(int t)
{
  static char buffer[80];
  if( t < 0 || t == 256 ) return "<ERROR>";
  if( t == 0 ) return "EOF";
  if( t < 256 )
  {  sprintf( buffer, "CHAR %c", (unsigned char)(unsigned int)t );
     return (char*)buffer;
  }
  switch(t)
  {  case TK_AND:            return "AND";
     case TK_BREAK:          return "BREAK";
     case TK_DO:             return "DO";
     case TK_ELSE:           return "ELSE";
     case TK_ELSEIF:         return "ELSEIF";
     case TK_END:            return "END";
     case TK_FALSE:          return "FALSE";
     case TK_FOR:            return "FOR";
     case TK_FUNCTION:       return "FUNCTION";
     case TK_IF:             return "IF";
     case TK_IN:             return "IN";
     case TK_LOCAL:          return "LOCAL";
     case TK_NIL:            return "NIL";
     case TK_NOT:            return "NOT";
     case TK_OR:             return "OR";
     case TK_REPEAT:         return "REPEAT";
     case TK_RETURN:         return "RETURN";
     case TK_THEN:           return "THEN";
     case TK_TRUE:           return "TRUE";
     case TK_UNTIL:          return "UNTIL";
     case TK_WHILE:          return "WHILE";
     case TK_CONCAT:         return "CONCAT";
     case TK_DOTS:           return "DOTS";
     case TK_EQ:             return "EQ";
     case TK_GE:             return "GE";
     case TK_LE:             return "LE";
     case TK_NE:             return "NE";
     case TK_NUMBER:         return "NUMBER";
     case TK_NAME:           return "NAME";
     case TK_STRING:         return "STRING";
     case TK_LONGSTRING:     return "LONGSTRING";
     case TK_SHORTCOMMENT:   return "SHORTCOMMENT;";
     case TK_LONGCOMMENT:    return "LONGCOMMENT;";
     case TK_WHITESPACE:     return "WHITESPACE";
     case TK_NEWLINE:        return "NEWLINE";
     case TK_BADCHAR:        return "BADCHAR";
     default: break;
  }
  sprintf( buffer, "<? %d>", t );
  return buffer;
}


int main( int argc, char ** argv )
{
  int tok;

  yyin  = (argc>1) ? fopen(argv[1],"rt") : 0;
  yyout = (argc>2) ? fopen(argv[2],"wt") : 0;

  for( tok=yylex() ; tok ; tok=yylex() )
  {
     if( tok == TK_NEWLINE ) continue;
     if( tok == TK_WHITESPACE ) continue;
     fprintf( yyout, "%03d %-13.13s: %s\n", tok, TokenName(tok), yytext );
  }
  if(yyin!=stdin)fclose(yyin);
  if(yyout!=stdout)fclose(stdout);
  return 0;
}

#endif

/**/

--- file: lua.y ---
/*
* Grammar for Lua 5.1
* Dummy for now. Used to define token values.
*/

%{

%}

%token TK_EOF 0

%token TK_AND 257
%token TK_BREAK
%token TK_DO
%token TK_ELSE
%token TK_ELSEIF
%token TK_END
%token TK_FALSE
%token TK_FOR
%token TK_FUNCTION
%token TK_IF
%token TK_IN
%token TK_LOCAL
%token TK_NIL
%token TK_NOT
%token TK_OR
%token TK_REPEAT
%token TK_RETURN
%token TK_THEN
%token TK_TRUE
%token TK_UNTIL
%token TK_WHILE

%token TK_CONCAT
%token TK_DOTS
%token TK_EQ
%token TK_GE
%token TK_LE
%token TK_NE
%token TK_NUMBER
%token TK_NAME
%token TK_STRING

%token TK_LONGSTRING
%token TK_SHORTCOMMENT;
%token TK_LONGCOMMENT;
%token TK_WHITESPACE;
%token TK_NEWLINE;
%token TK_BADCHAR;

%%

start    :  Lua
        ;

Lua      :
        ;


%%

int yymain( int argc, char ** argv )
{
  return 0;
}

/**/

--- file: m.cmd ---
byacc -d lua.y
rem flex -Bs8 -Cef -oylex.c lua.l
flex -Bs8 -Cem -oylex.c lua.l
cl -MD -Ox -DYYMAIN ylex.c -link /opt:ref /opt:icf /opt:nowin98

--- file: y.tab.h ---
#ifndef YYERRCODE
#define YYERRCODE 256
#endif

#define TK_EOF 0
#define TK_AND 257
#define TK_BREAK 258
#define TK_DO 259
#define TK_ELSE 260
#define TK_ELSEIF 261
#define TK_END 262
#define TK_FALSE 263
#define TK_FOR 264
#define TK_FUNCTION 265
#define TK_IF 266
#define TK_IN 267
#define TK_LOCAL 268
#define TK_NIL 269
#define TK_NOT 270
#define TK_OR 271
#define TK_REPEAT 272
#define TK_RETURN 273
#define TK_THEN 274
#define TK_TRUE 275
#define TK_UNTIL 276
#define TK_WHILE 277
#define TK_CONCAT 278
#define TK_DOTS 279
#define TK_EQ 280
#define TK_GE 281
#define TK_LE 282
#define TK_NE 283
#define TK_NUMBER 284
#define TK_NAME 285
#define TK_STRING 286
#define TK_LONGSTRING 287
#define TK_SHORTCOMMENT 288
#define TK_LONGCOMMENT 289
#define TK_WHITESPACE 290
#define TK_NEWLINE 291
#define TK_BADCHAR 292

/* eof */