#* ABOUT A [nom] script that pretty prints an ascii *nom* script listing in html. The listings package doesn't support unicode so its not much good for a language parsing language. See eg/nom.tohtml.pss Use the listings package and lstlisting word and define a new language 'nom' with a style. The problem as always is pdflatex can't handle unicode chars. need to use 'minted' with xelatex * a code listing with caption ----- \begin{lstlisting}[language=Python, caption=Python example] .... \end{lstlisting} ,,,, * make a list of listing with the above caption >> \lstlistoflistings This uses the 'listings' package to highlight or colourise the different components of the script such as nomsyn://quotes and nomsyn://comments etc. TESTING * compile a script to pdf and view ---- pep -f eg/nom.tohtml.notunicode.pss script.pss > test.tex; pdflatex test.tex open test.pdf ,,,, But unfortunately this will not work if you have any unicode chars in you scripts. See eg/nom.tohtml.pss for something hopefully better. NOTES This script like /eg/nom.tohtml.pss and /eg/nom.snippet.tohtml.pss actually expands nom abbreviation commands to their full name. I am not sure if this is a good idea. Also, the listings [latex] package will also break long lines but it may be better for me to do this here. I dont even need lexing code because I can just put the the code in a "lstlisting" box in a [latex] document and then we are finished. So all we need is a nomsyn://begin block and a (eof) block. I have left the lexing code because it gives flexibility to do things like break lines where I want to break them and fix simple errors like un-terminated quotes. * page geometry in latex >> \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in } * latex font sizes ----- \tiny \scriptsize \footnotesize \small \normalsize \large \Large \LARGE \huge \Huge ,,,, STATUS 20 march 2025 working but with unicode and symbol character problems TODO TOKENS There are no tokens used in this script because no parsing is done HISTORY 20 mar 2025 starting based on nom.tohtml.pss *# begin { # make a valid LaTeX document add " %% ------------------------------------------- %% latex generated by: nom.tolatex.pss %% the geometry package stops big margins. \\documentclass{article} \\usepackage[margin=40pt,nohead]{geometry} \\usepackage{listings} \\usepackage{xcolor} \\definecolor{codegreen}{rgb}{0,0.6,0} \\definecolor{codegray}{rgb}{0.5,0.5,0.5} \\definecolor{codepurple}{rgb}{0.58,0,0.82} \\definecolor{backcolour}{rgb}{0.95,0.95,0.92} \\lstdefinelanguage{nom}{ morekeywords={begin, a+, a-, zero, escape, ++, --, add, clip, clop, replace, upper, lower, cap, clear, print, state, pop, push, unstack, stack, put, get, swap, mark, go, read, until, while, whilenot, count, zero, chars, lines, nochars, nolines, escape, unescape, delim, quit, write, reparse, restart, nop, parse, eof, EOF, == }, sensitive=true, % keywords are case-sensitive morecomment=[l]{\\#}, % l is for line comment morecomment=[s]{\\#*}{*\\#}, % s is for start and end delimiter morestring=[s]{\"}{\"}, % double quoted strings, tests morestring=[s]{'}{'}, % double quoted strings, tests morestring=[s][\\color{orange}]{\\[}{\\]}, % for classes } % \\lstdefinestyle{newstyle}{ xleftmargin=0pt, %% margin on the left outside the frames framexleftmargin=0pt, framexrightmargin=0pt, framexbottommargin=5pt, framextopmargin=5pt, backgroundcolor=\\color{backcolour}, commentstyle=\\color{codegreen}, keywordstyle=\\color{magenta}, numberstyle=\\tiny\\color{codegray}, stringstyle=\\color{codepurple}, basicstyle=\\ttfamily\\normalsize, breakatwhitespace=false, breaklines=true, captionpos=b, keepspaces=true, numbers=left, numbersep=5pt, showspaces=false, showstringspaces=false, showtabs=false, tabsize=2 } \\lstset{style=newstyle} \\parindent=0pt \\parskip=6pt \\title{document.title} \\author{document.author} \\date{\\today} \\begin{document} \\begin{lstlisting}[language = nom] "; print; clear; } # end the latex code listing and document (eof) { add "\\end{lstlisting} \n"; add "\\end{document} \n"; print; quit; } read; # line-relative character numbers. could be important for spliting # lines properly. [\n] { nochars; } # just print space as-is [:space:] { while [:space:]; print; # can restart because there is an eof block above read clear; .restart } # literal tokens, for readability maybe 'dot*' and 'comma*' [<{}(!BE,.;)>] { # this put/clear/get/ code is superfluous put; clear; get; print; clear; .restart } # command names, need to do some tricks to parse ++ -- a+ etc # here. This is because [:alpha:],[+-] etc is not a union set # and while cannot do "while [:alpha:],[+-] etc # subtle bug, [+-^0=] parses as a range!!! [a-z] [:alpha:],[-+^0=] { "0" { clear; add "zero"; } "^" { clear; add "escape"; } "+" { while [+]; } "-" { while [-]; } "=" { while [=]; } while [:alpha:]; # parse a+ or a- for the accumulator "a" { # while [+-] is bug because compile.pss thinks its a range class # not a list class while [-+]; "a+","a-" { put; } "a" { clear; add "add"; } } # one letter command abbreviations put; clear; add "#"; get; add "#"; replace "#k#" "#clip#"; replace "#K#" "#clop#"; replace "#D#" "#replace#"; replace "#d#" "#clear#"; replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; replace "#z#" "#delim#"; replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; # remove trailing and leading '#' char clip; clop; put; # writefile is also a command? # commands parsed above "a+","a-","zero","escape","++","--", "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit","write", "reparse","restart","nop" { print; clear; .restart } # words not commands "parse","eof","EOF","==" { print; clear; .restart } "begin" { print; clear; .restart } # lower case and check for command with error lower; "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit", "write", "zero","++","--","a+","a-","nop", "begin","parse","reparse","restart" { # add an error marker? clear; add "#* ?? *#"; get; print; clear; .restart } # add an error marker clear; add "#* ?? *#"; get; print; clear; .restart } # single line comments '#' { (eof) { print; .restart } read; [#\n] { print; .restart } # multiline comments "#*" { until "*#"; put; !E"*#" { add "?? *#"; print; clear; .restart } print; clear; .restart } whilenot [\n]; put; print; clear; .restart } # double quoted text '"' { # no error checking. # see nom.syntax.reference.pss for error checking until '"'; put; !E'"' { add '?? "'; print; clear; .restart } print; clear; .restart } # single quotes "'" { until "'"; put; !E"'" { add "?? '"; print; clear; .restart } print; clear; .restart } # classes "[" { until "]"; put; !E"]" { add "?? ]"; print; clear; .restart } B"[:".E":]".!"[::]".!"[:]" { clip; clip; clop; clop; put; # list of [:class:] classes here. The character classes also # abbreviations in nom (which may be silly but anyway) "alnum","N","alpha","A","ascii","I","word","W","blank","B", "cntrl","C","digit","D","graph","G","lower","L","print","P", "punct","T","space","S","upper","U","xdigit","X" { clear; add "[:"; get; add ":]"; print; clear; .restart } clear; add "[?"; get; add "?]"; print; clear; .restart } # now [a-z] classes. I will not permit [\n-\t] silly # todo check this print; clear; .restart } !"" { add " #* char ?? *#"; print; clear; .restart } # not used, no parsing. parse>