#*

ABOUT 

  A [nom] script that pretty prints a *nom* script listing in 
  latex. Doesnt really work.  trying to use 'minted' with xelatex
  compiles but no close language.

  This uses the 'minted' package to highlight or colourise the different
  components of the script such as nomsyn://quotes and nomsyn://comments etc.
  This should be run with xelatex or lualatex not pdflatex.

TESTING 

  This was hard to get to work. Also I dont know how to define a lang
  with minted, so not that useful.

  install xetex xelatex. 

  * install Pygments, needed by 'minted'
  >> pip3 install Pygments

  * I had to put cache=false for some unknown reason
  >> \\usepackage[cache=false]{minted}

  * compile the doc with weird options
  ------
   pep -f nom.tolatex.pss script.pss > test.tex;
   xelatex -synctex=1 -interaction=nonstopmode --shell-escape test.tex
  ,,,

NOTES

  I may have to write my own latex listing code.

  It was hard to get this compiling and the 'minted' package 
  doesnt really work for [nom] especially because nom has a multiline
  comment syntax hash-star that doesn't exist in other languages.

  This script like /eg/nom.tohtml.pss and /eg/nom.snippet.tohtml.pss
  actually expands nom abbreviation commands to their full name. I 
  am not sure if this is a good idea.

  I dont even need lexing code because I can just put the
  the code in a "lstlisting" box in a [latex] document and 
  then we are finished. So all we need is a nomsyn://begin block
  and a (eof) block. I have left the lexing code because it gives 
  flexibility to do things like break lines where I want to break
  them and fix simple errors like un-terminated quotes.

  * page geometry in latex

  >> \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in }
  * latex font sizes
  -----
    \tiny \scriptsize \footnotesize \small \normalsize 
    \large \Large \LARGE \huge \Huge
  ,,,,

STATUS

  20 march 2025
    compiles but not very useful because there is no simple way 
    to add a new language to minted or pyments (a python program)
    And I cant use the listings package because it doesnt do
    unicode. So we are a bit stuck, as usual with [latex] because
    it is a steaming pile of ....

TODO

TOKENS 

  There are no tokens used in this script because no parsing 
  is done

HISTORY

  20 mar 2025 
    starting based on nom.tohtml.pss

*#

  begin {

    # make a valid LaTeX document
      add "
  %% -------------------------------------------
  %%  latex generated by: nom.tolatex.pss 
  %%  the geometry package stops big margins.

  \\documentclass{article}
  \\usepackage[margin=40pt,nohead]{geometry}
  \\usepackage[cache=false]{minted}
  \\usepackage{xcolor}

  \\definecolor{codegreen}{rgb}{0,0.6,0}
  \\definecolor{codegray}{rgb}{0.5,0.5,0.5}
  \\definecolor{codepurple}{rgb}{0.58,0,0.82}
  \\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

  \\parindent=0pt
  \\parskip=6pt
  \\title{document.title}
  \\author{document.author}
  \\date{\\today}

  \\begin{document}

  \\begin{minted}{java}

  ";
    print; clear;
  }

  # end the latex code listing and document 
  (eof) {
    add "\\end{minted} \n";
    add "\\end{document} \n";
    print; quit;
  }

  read;

  # line-relative character numbers. could be important for spliting
  # lines properly.
  [\n] { nochars; }
  # just print space as-is 
  [:space:] { 
     while [:space:]; print; 
     # can restart because there is an eof block above read
     clear; .restart 
  }
  # literal tokens, for readability maybe 'dot*' and 'comma*'
  [<{}(!BE,.;)>] { 
    # this put/clear/get/ code is superfluous
    put; clear; get;
    print; clear; .restart 
  }

  # command names, need to do some tricks to parse ++ -- a+ etc
  # here. This is because [:alpha:],[+-] etc is not a union set
  # and while cannot do "while [:alpha:],[+-] etc

  # subtle bug, [+-^0=] parses as a range!!! [a-z]
  [:alpha:],[-+^0=] {

    "0" { clear; add "zero"; }
    "^" { clear; add "escape"; }
    "+" { while [+]; }
    "-" { while [-]; }
    "=" { while [=]; }
    while [:alpha:]; 

    # parse a+ or a- for the accumulator
    "a" { 
      # while [+-] is bug because compile.pss thinks its a range class
      # not a list class
      while [-+]; 
      "a+","a-" { put; }
      "a" { clear; add "add"; }
    }

    # one letter command abbreviations

    put; clear; add "#"; get; add "#";
    replace "#k#" "#clip#"; replace "#K#" "#clop#";
    replace "#D#" "#replace#"; replace "#d#" "#clear#"; 
    replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; 
    replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; 
    replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; 
    replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; 
    replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; 
    replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; 
    replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; 
    replace "#z#" "#delim#"; 
    replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; 
    replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; 

    # remove trailing and leading '#' char
    clip; clop; put;

    # writefile is also a command?

    # commands parsed above
    "a+","a-","zero","escape","++","--",
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit","write",
    "reparse","restart","nop" {
      print; clear; .restart
    }

    # words not commands
    "parse","eof","EOF","==" {
      print; clear; .restart
    }

    "begin" { 
      print; clear; .restart
    }

    # lower case and check for command with error
    lower; 
    "add","clip","clop","replace","upper","lower","cap","clear",
    "print","state","pop","push","unstack","stack","put","get","swap",
    "mark","go","read","until","while","whilenot",
    "count","zero","chars","lines","nochars","nolines",
    "escape","unescape","delim","quit", "write",
    "zero","++","--","a+","a-","nop",
    "begin","parse","reparse","restart" {
      # add an error marker?
      clear; add "#* ?? *#"; get; 
      print; clear; .restart
    }

    # add an error marker
    clear; add "#* ?? *#"; get; 
    print; clear; .restart
  }

  # single line comments
  '#' {
    (eof) { print; .restart }
    read; 
    [#\n] { print; .restart }
    # multiline comments
    "#*" {
      until "*#"; put;
      !E"*#" { 
        add "?? *#"; print; clear; .restart
      }
      print; clear; .restart
    }
    whilenot [\n]; put;
    print; clear; .restart
  }

  # double quoted text
  '"' {
    # no error checking. 
    # see nom.syntax.reference.pss for error checking
    until '"'; put; 
    !E'"' { 
      add '?? "'; print; clear; .restart
    }
    print; clear; .restart
  }

  # single quotes
  "'" {
    until "'"; put; 
    !E"'" { 
      add "?? '"; print; clear; .restart
    }
    print; clear; .restart
  }

  # classes
  "[" {
    until "]"; put;
    !E"]" { 
      add "?? ]"; print; clear; .restart
    }
    
    B"[:".E":]".!"[::]".!"[:]" { 
      clip; clip; clop; clop; put;
      # list of [:class:] classes here. The character classes also
      # abbreviations in nom (which may be silly but anyway) 
      "alnum","N","alpha","A","ascii","I","word","W","blank","B",
      "cntrl","C","digit","D","graph","G","lower","L","print","P",
      "punct","T","space","S","upper","U","xdigit","X" {
        clear; add "[:"; get; add ":]"; 
        print; clear; .restart
      }
      clear; 
      add "[?"; get; add "?]"; 
      print; clear; .restart
    }
    # now [a-z] classes. I will not permit [\n-\t] silly
    # todo check this 
    print; clear; .restart
  }

  !"" {
    add " #* char ?? *#"; 
    print; clear; .restart
  }

 # not used, no parsing.
 parse>