#-- attempts to tokenise or to lexically analyse
#-- a grammar file. The grammar is for a parsing machine

#p;
#l;
#-- Under DOS is maybe necessary to remove the
#-- final \r character
# s/.$//;
#-- Lines that begin with a hash in the grammar
#-- file should be ignored as comments
/^#/{
 s/^/# Input Line:/;
 p;
 d;
 i\
 Ignored as comment
}

h;
s/^/# Input Line:/;
l;
x;

s/$/ /;
: convertBars
s/\("[^|"]*\)|/\1[:bar:]/;
s,\(/[^|/]*\)|,\1[:bar:],;
t convertBars

s/"\([^"]*\)"/String:\1|/g;
s,/\([^/]*\)/,Pattern:\1|,g;

#-- Within a quoted string, special symbol characters
#-- need to be converted
:loopA 
s/\(String:[^|;]*\);/\1[:semicolon:]/;
s/\(String:[^|+]*\)+/\1[:plus:]/;
s/\(String:[^|-]*\)-/\1[:minus:]/;
s/\(String:[^ |]*\) /\1[:space:]/;
s/\(String:[^{|]*\){/\1[:lbrace:]/;
s/\(String:[^}|]*\)}/\1[:rbrace:]/;
s/\(String:[^|\\]*\)\\/\1[:backslash:]/;
t loopA

:loopB
s/\(Pattern:[^|;]*\);/\1[:semicolon:]/;
s/\(Pattern:[^|+]*\)+/\1[:plus:]/;
s/\(Pattern:[^|-]*\)-/\1[:minus:]/;
s/\(Pattern:[^ |]*\) /\1[:space:]/;
s/\(Pattern:[^{|]*\){/\1[:lbrace:]/;
s/\(Pattern:[^}|]*\)}/\1[:rbrace:]/;
s/\(Pattern:[^|\\]*\)\\/\1[:backslash:]/;
t loopB 

#-- Since an identifier is end-delimited by a space
#-- we place a space in front of other tokens to avoid 
#-- elision

s/{/ {/g;
s/}/ }/g;
s/\([0-9][0-9]*\)/ \1/g;
s/;/ ;/g;

s/-/Minus-Sign|/g;
s/+/Plus-Sign|/g;
s/\([0-9][0-9]*\)/Digit:\1|/g;
s/{/Left-Brace:|/g;
s/}/Right-Brace:|/g;
s/;/Semi-Colon:|/g;
#s/\([a-zA-Z][a-zA-Z]*\)/Word:\1
s/clear/Word:clear|/g;
s/print/Word:print|/g;
s/push/Word:push|/g;
s/pop/Word:pop|/g;
s/add/Word:add|/g;
s/get/Word:get|/g;
s/put/Word:put|/g;

#-- Convert bars to new line characters, and
#-- thus put each token on a new line. This technique
#-- works with GNU sed version 4.0.8 and version 3.0.2
s/ //g;
#-- Convert special characters in quoted strings
#-- back to their original form. We will leave the
#-- brackets in their fake html entity form
s/\[:space:\]/ /g;
s/\[:lbrace:\]/{/g;
s/\[:rbrace:\]/}/g;
s/\[:backslash:\]/\\/g;
s/\[:semicolon:\]/;/g;
s/\[:plus:\]/+/g;
s/\[:minus:\]/-/g;
s/|/\
/g;
s/\[:bar:\]/|/g;
p;