#*

ABOUT 

  A [nom] script that pretty prints an ascii *nom* script listing in 
  html.

  The listings package doesn't support unicode so its not much 
  good for a language parsing language. See eg/nom.tohtml.pss

  Use the listings package and lstlisting word and define a 
  new language 'nom' with a style. The problem as always is pdflatex
  can't handle unicode chars. need to use 'minted' with xelatex

  * a code listing with caption
  -----
    \begin{lstlisting}[language=Python, caption=Python example]
     ....
    \end{lstlisting}
  ,,,,

  * make a list of listing with the above caption
  >> \lstlistoflistings

  This uses the 'listings' package to highlight or colourise the different
  components of the script such as nomsyn://quotes and nomsyn://comments etc.

TESTING
 
  * compile a script to pdf and view
  ----
    pep -f eg/nom.tohtml.notunicode.pss script.pss > test.tex; pdflatex test.tex
    open test.pdf
  ,,,,

  But unfortunately this will not work if you have any unicode chars
  in you scripts. See eg/nom.tohtml.pss for something hopefully better.


NOTES

  This script like /eg/nom.tohtml.pss and /eg/nom.snippet.tohtml.pss
  actually expands nom abbreviation commands to their full name. I 
  am not sure if this is a good idea.

  Also, the listings [latex] package will also break long lines
  but it may be better for me to do this here.

  I dont even need lexing code because I can just put the
  the code in a "lstlisting" box in a [latex] document and 
  then we are finished. So all we need is a nomsyn://begin block
  and a (eof) block. I have left the lexing code because it gives 
  flexibility to do things like break lines where I want to break
  them and fix simple errors like un-terminated quotes.

  * page geometry in latex

  >> \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in }
  * latex font sizes
  -----
    \tiny \scriptsize \footnotesize \small \normalsize 
    \large \Large \LARGE \huge \Huge
  ,,,,

STATUS

  20 march 2025
    working but with unicode and symbol character problems

TODO

TOKENS 

  There are no tokens used in this script because no parsing 
  is done

HISTORY
  20 mar 2025 
    starting based on nom.tohtml.pss

*#

  begin {

    # make a valid LaTeX document
      add "
  %% -------------------------------------------
  %%  latex generated by: nom.tolatex.pss 
  %%  the geometry package stops big margins.

  \\documentclass{article}
  \\usepackage[margin=40pt,nohead]{geometry}
  \\usepackage{listings}
  \\usepackage{xcolor}

  \\definecolor{codegreen}{rgb}{0,0.6,0}
  \\definecolor{codegray}{rgb}{0.5,0.5,0.5}
  \\definecolor{codepurple}{rgb}{0.58,0,0.82}
  \\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

  \\lstdefinelanguage{nom}{
    morekeywords={begin, a+, a-, zero, escape, ++, --, 
      add, clip, clop, replace, upper, lower, cap, clear, 
      print, state, pop, push, unstack, stack, put, get, swap, 
      mark, go, read, until, while, whilenot, 
      count, zero, chars, lines, nochars, nolines, 
      escape, unescape, delim, quit, write, 
      reparse, restart, nop, parse, eof, EOF, == },
    sensitive=true,          % keywords are case-sensitive
    morecomment=[l]{\\#},        % l is for line comment
    morecomment=[s]{\\#*}{*\\#}, % s is for start and end delimiter
    morestring=[s]{\"}{\"},      % double quoted strings, tests
    morestring=[s]{'}{'},      % double quoted strings, tests
    morestring=[s][\\color{orange}]{\\[}{\\]},  % for classes
  } %

  \\lstdefinestyle{newstyle}{
      xleftmargin=0pt,          %% margin on the left outside the frames
      framexleftmargin=0pt,
      framexrightmargin=0pt,
      framexbottommargin=5pt,
      framextopmargin=5pt,
      backgroundcolor=\\color{backcolour},
      commentstyle=\\color{codegreen},
      keywordstyle=\\color{magenta},
      numberstyle=\\tiny\\color{codegray},
      stringstyle=\\color{codepurple},
      basicstyle=\\ttfamily\\normalsize,
      breakatwhitespace=false,
      breaklines=true,
      captionpos=b,
      keepspaces=true,
      numbers=left,
      numbersep=5pt,
      showspaces=false,
      showstringspaces=false,
      showtabs=false,
      tabsize=2
  }
  \\lstset{style=newstyle}

  \\parindent=0pt
  \\parskip=6pt
  \\title{document.title}
  \\author{document.author}
  \\date{\\today}

  \\begin{document}

  \\begin{lstlisting}[language = nom]

  ";
    print; clear;
  }

  # end the latex code listing and document 
  (eof) {
    add "\\end{lstlisting} \n";
    add "\\end{document} \n";
    print; quit;
  }

  read;

  # line-relative character numbers. could be important for spliting
  # lines properly.
  [\n] { nochars; }
  # just print space as-is 
  [:space:] { 
     while [:space:]; print; 
     # can restart because there is an eof block above read
     clear; .restart 
  }
  # literal tokens, for readability maybe 'dot*' and 'comma*'
  [<{}(!BE,.;)>] { 
    # this put/clear/get/ code is superfluous
    put; clear; get;
    print; clear; .restart 
  }

  # command names, need to do some tricks to parse ++ -- a+ etc
  # here. This is because [:alpha:],[+-] etc is not a union set
  # and while cannot do "while [:alpha:],[+-] etc

  # subtle bug, [+-^0=] parses as a range!!! [a-z]
  [:alpha:],[-+^0=] {

    "0" { clear; add "zero"; }
    "^" { clear; add "escape"; }
    "+" { while [+]; }
    "-" { while [-]; }
    "=" { while [=]; }
    while [:alpha:]; 

    # parse a+ or a- for the accumulator
    "a" { 
      # while [+-] is bug because compile.pss thinks its a range class
      # not a list class
      while [-+]; 
      "a+","a-" { put; }
      "a" { clear; add "add"; }
    }

    # one letter command abbreviations

    put; clear; add "#"; get; add "#";
    replace "#k#" "#clip#"; replace "#K#" "#clop#";
    replace "#D#" "#replace#"; replace "#d#" "#clear#"; 
    replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; 
    replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; 
    replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; 
    replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; 
    replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; 
    replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; 
    replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; 
    replace "#z#" "#delim#"; 
    replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; 
    replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; 

    # remove trailing and leading '#' char
    clip; clop; put;

    # writefile is also a command?

    # commands parsed above
    "a+","a-","zero","escape","++","--",
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit","write",
    "reparse","restart","nop" {
      print; clear; .restart
    }

    # words not commands
    "parse","eof","EOF","==" {
      print; clear; .restart
    }

    "begin" { 
      print; clear; .restart
    }

    # lower case and check for command with error
    lower; 
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit", "write",
    "zero","++","--","a+","a-","nop",
    "begin","parse","reparse","restart" {
      # add an error marker?
      clear; add "#* ?? *#"; get; 
      print; clear; .restart
    }

    # add an error marker
    clear; add "#* ?? *#"; get; 
    print; clear; .restart
  }

  # single line comments
  '#' {
    (eof) { print; .restart }
    read; 
    [#\n] { print; .restart }
    # multiline comments
    "#*" {
      until "*#"; put;
      !E"*#" { 
        add "?? *#"; print; clear; .restart
      }
      print; clear; .restart
    }
    whilenot [\n]; put;
    print; clear; .restart
  }

  # double quoted text
  '"' {
    # no error checking. 
    # see nom.syntax.reference.pss for error checking
    until '"'; put; 
    !E'"' { 
      add '?? "'; print; clear; .restart
    }
    print; clear; .restart
  }

  # single quotes
  "'" {
    until "'"; put; 
    !E"'" { 
      add "?? '"; print; clear; .restart
    }
    print; clear; .restart
  }

  # classes
  "[" {
    until "]"; put;
    !E"]" { 
      add "?? ]"; print; clear; .restart
    }
    
    B"[:".E":]".!"[::]".!"[:]" { 
      clip; clip; clop; clop; put;
      # list of [:class:] classes here. The character classes also
      # abbreviations in nom (which may be silly but anyway) 
      "alnum","N","alpha","A","ascii","I","word","W","blank","B",
      "cntrl","C","digit","D","graph","G","lower","L","print","P",
      "punct","T","space","S","upper","U","xdigit","X" {
        clear; add "[:"; get; add ":]"; 
        print; clear; .restart
      }
      clear; 
      add "[?"; get; add "?]"; 
      print; clear; .restart
    }
    # now [a-z] classes. I will not permit [\n-\t] silly
    # todo check this 
    print; clear; .restart
  }

  !"" {
    add " #* char ?? *#"; 
    print; clear; .restart
  }

 # not used, no parsing.
 parse>