#*

ABOUT 

  A [nom] script that pretty prints a *nom* script listing in 
  html.

  But we might not even need lexing code because just need to 
  be the code in a "lstlisting" box in a [latex] document and 
  then we are finished. So all we need is a nomsyn://begin block
  and a (eof) block.

  Probably just use the listings package and lstlisting word
  with a similar language or make a new formatter. See the overleaf
  page for a good explanation. "bash" is probably the closest 
  language for pretty printing [nom] that is supported by the 
  listings package.

  * a code listing with caption
  -----
    \begin{lstlisting}[language=Python, caption=Python example]
     ....
    \end{lstlisting}
  ,,,,

  * make a list of listing with the above caption
  >> \lstlistoflistings

  This uses <span> tags to colourise the different components 
  of the script such as nomsyn://quotes and nomsyn://comments etc.
  I used this technique because this seems to be what other 
  [html] "pretty-printers" urbandict://pretty.print such as 
  highlight.js do.

  We can just 'lex' the script and colorise it/

STATUS

  20 march 2025
    starting:

TODO
NOTES
TOKENS 
  There are no tokens used in this script because no parsing 
  is done

HISTORY
  20 mar 2025 
    starting based on nom.tohtml.pss

*#

  begin {

      # make a valid LaTeX document
      add "
  %% -------------------------------------------
  %%  latex generated by: mark.latex.pss 
  %%   from source file : 
  %%                  on: 
  %% -------------------------------------------

  \\documentclass[a4paper,12pt]{article}
  \\usepackage[margin=4pt,noheadfoot]{geometry}
  \\usepackage{xcolor}                  %% to use colours
  \\usepackage{multicol}                %% for multiple columns
  \\usepackage{keystroke}               %% for keyboard key images
  \\usepackage[toc]{multitoc}           %% for multi column table of contents
  \\usepackage{tocloft}                 %% to customize the table of contents
  \\setcounter{tocdepth}{2}             %% only display 2 levels in the contents
  \\setlength{\\cftbeforesecskip}{0cm}   %% make the toc more compact
  \\usepackage{listings}                %% for nice code listings
  \\usepackage{caption}                 %% 

  %% see overleaf page for better liststyle
  \\lstset{
    captionpos=t,
    language=bash,
    basicstyle=\\ttfamily,          %% fixed pitch font
    xleftmargin=0pt,                %% margin on the left outside the frames
    framexleftmargin=0pt,
    framexrightmargin=0pt,
    framexbottommargin=5pt,
    framextopmargin=5pt,
    breaklines=true,                %% break long code lines
    breakatwhitespace=false,        %% break long code lines anywhere
    breakindent=10pt,               %% reduce the indent from 20pt to 10
    postbreak=\\mbox{{\\color{blue}\\small$\\Rightarrow$\\space}},  %% mark with arrow
    showstringspaces=false,            %% dont show spaces within strings
    framerule=2pt,                     %% thickness of the frames
    frame=top,frame=bottom,
    rulecolor=\\color{lightgrey}, 
    % frame=l
    % define special comment delimiters '##(' and ')'
    % moredelim=[s][\\color{grey}\\itshape\\footnotesize\\ttfamily]{~(}{)},
  }   %% source code settings
  \\usepackage{graphicx}                %% to include images
  \\usepackage{fancybox}                %% boxes with rounded corners
  \\usepackage{wrapfig}                 %% flow text around tables, images
  \\usepackage{tabularx}                %% change width of tables
  \\usepackage[table]{xcolor}           %% alternate row colour tables
  \\usepackage{booktabs}                %% for heavier rules in tables
  \\usepackage[small,compact]{titlesec} %% sections more compact, less space
  \\usepackage{enumitem}                %% more compact and better lists
  \\setlist{noitemsep}                  %% reduce list item spacing

  \\usepackage{hyperref}     %% make urls into hyperlinks ?? no
  \\hypersetup{              %% add pdftex if only pdf output is required
     colorlinks=false,       %% set up the colours for the hyperlinks
     linkcolor=black,        %% internal document links black
     urlcolor=black,        %% url links black
     frenchlinks=true,
     bookmarks=true, pdfpagemode=UseOutlines}

  \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in }
  %% define some colours to use
  \\definecolor{lightgrey}{gray}{0.70}
  \\definecolor{grey}{gray}{0.30}

  \\title{document.title}
  \\author{document.author}
  \\date{\\today}
  \\setlength{\\parindent}{0pt}

  %% label lists with stars
  \\renewcommand{\\labelitemi}{$\\star$}

  \\parindent=0pt
  \\parskip=6pt
  \\begin{document}

  ";
    print; clear;
  }

  # close the <pre> and <code> tags.
  (eof) {
    add "\n\\end{document} \n";
    print; quit;
  }
  read;

  # line-relative character numbers 
  [\n] { nochars; }
  # just print space as-is 
  [:space:] { 
     while [:space:]; print; 
     # no silent exit on EOF
     clear; (eof) { .reparse } !(eof) { .restart } 
  }

  # literal tokens, for readability maybe 'dot*' and 'comma*'
  [<{}(!BE,.;)>] { 
    
    put; clear; add "<span class='nom-punct'>"; get; add "</span>"; 
    print; clear; .restart 
  }

  # command names, need to do some tricks to parse ++ -- a+ etc
  # here. This is because [:alpha:],[+-] etc is not a union set
  # and while cannot do "while [:alpha:],[+-] etc

  # subtle bug, [+-^0=] parses as a range!!! [a-z]
  [:alpha:],[-+^0=] {

    "0" { clear; add "zero"; }
    "^" { clear; add "escape"; }
    "+" { while [+]; }
    "-" { while [-]; }
    "=" { while [=]; }
    while [:alpha:]; 

    # parse a+ or a- for the accumulator
    "a" { 
      # while [+-] is bug because compile.pss thinks its a range class
      # not a list class
      while [-+]; 
      "a+","a-" { put; }
      "a" { clear; add "add"; }
    }

    # one letter command abbreviations

    put; clear; add "#"; get; add "#";
    replace "#k#" "#clip#"; replace "#K#" "#clop#";
    replace "#D#" "#replace#"; replace "#d#" "#clear#"; 
    replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; 
    replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; 
    replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; 
    replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; 
    replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; 
    replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; 
    replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; 
    replace "#z#" "#delim#"; 
    replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; 
    replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; 

    # remove trailing and leading '#' char
    clip; clop; put;

    # writefile is also a command?

    # commands parsed above
    "a+","a-","zero","escape","++","--",
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit","write",
    "reparse","restart","nop" {

      clear; 
      add "<span class='nom-command'>"; get; add "</span>"; 
      print; clear; .restart
    }

    # words not commands
    "parse","eof","EOF","==" {
      clear; 
      add "<span class='nom-word'>"; get; add "</span>"; 
      print; clear; .restart
    }

    "begin" { 
      clear; 
      add "<span class='nom-keyword'>"; get; add "</span>"; 
      print; clear; .restart
    }

    # lower case and check for command with error
    lower; 
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit", "write",
    "zero","++","--","a+","a-","nop",
    "begin","parse","reparse","restart" {
      clear; 
      add "<span class='nom-unknown'>"; get; add "</span>"; 
      print; clear; .restart
    }

    # mark up in red errors
    clear; 
    add "<span class='nom-unknown'>"; get; add "</span>"; 
    print; clear; .restart
  }

  # single line comments
  '#' {
    (eof) { print; .restart }
    read; 
    [#\n] { print; .restart }
    # multiline comments
    "#*" {
      until "*#"; put;
      !E"*#" { 
        clear; add "<span class='nom-error'>"; get; add "</span>"; 
        print; clear; .restart
      }
      clear; add "<span class='nom-comment'>"; get; add "</span>"; 
      print; clear; .restart
    }
    whilenot [\n]; put;
    clear; add "<span class='nom-comment'>"; get; add "</span>"; 
    print; clear; .restart
  }

  # double quoted text
  '"' {
    # no error checking. 
    # see nom.syntax.reference.pss for error checking
    until '"'; put; 
    !E'"' { 
      clear; add "<span class='nom-error'>"; get; add "</span>"; 
      print; clear; .restart
    }
    clear; add "<span class='nom-string'>"; get; add "</span>"; 
    print; clear; .restart
  }

  # single quotes
  "'" {
    until "'"; put; 

    !E"'" { 
      clear; add "<span class='nom-error'>"; get; add "</span>"; 
      print; clear; .restart
    }

    clear; add "<span class='nom-string'>"; get; add "</span>"; 
    print; clear; .restart
  }

  # classes like [:space:] or [abc] or [a-z] 
  # these are used in tests and also in while/whilenot
  # The *until* command will read past 'escaped' end characters eg \]
  # 

  "[" {
    until "]"; put;
    !E"]" { 
      clear; add "<span class='nom-error'>"; get; add "</span>"; 
      print; clear; .restart
    }
    
    B"[:".E":]".!"[::]".!"[:]" { 
      clip; clip; clop; clop; put;
      # list of [:class:] classes here. The character classes also
      # abbreviations in nom (which may be silly but anyway) 
      "alnum","N","alpha","A","ascii","I","word","W","blank","B",
      "cntrl","C","digit","D","graph","G","lower","L","print","P",
      "punct","T","space","S","upper","U","xdigit","X" {
        clear; 
        add "<span class='nom-class'>[:"; get; add ":]</span>"; 
        print; clear; .restart
      }
      clear; 
      add "<span class='nom-unknown'>[:"; get; add ":]</span>"; 
      print; clear; .restart
    }
    # now [a-z] classes. I will not permit [\n-\t] silly
    # todo check this 
    clear; add "<span class='nom-class'>"; get; add "</span>"; 
    print; clear; .restart
  }

  !"" {
    put; clear; 
    add "<span class='nom-error'>"; get; add " ??</span>"; 
    print; clear; .restart
  }

 # not used, no parsing.
 parse>