#* ABOUT A [nom] script that pretty prints a *nom* script listing in html. But we might not even need lexing code because just need to be the code in a "lstlisting" box in a [latex] document and then we are finished. So all we need is a nomsyn://begin block and a (eof) block. Probably just use the listings package and lstlisting word with a similar language or make a new formatter. See the overleaf page for a good explanation. "bash" is probably the closest language for pretty printing [nom] that is supported by the listings package. * a code listing with caption ----- \begin{lstlisting}[language=Python, caption=Python example] .... \end{lstlisting} ,,,, * make a list of listing with the above caption >> \lstlistoflistings This uses <span> tags to colourise the different components of the script such as nomsyn://quotes and nomsyn://comments etc. I used this technique because this seems to be what other [html] "pretty-printers" urbandict://pretty.print such as highlight.js do. We can just 'lex' the script and colorise it/ STATUS 20 march 2025 starting: TODO NOTES TOKENS There are no tokens used in this script because no parsing is done HISTORY 20 mar 2025 starting based on nom.tohtml.pss *# begin { # make a valid LaTeX document add " %% ------------------------------------------- %% latex generated by: mark.latex.pss %% from source file : %% on: %% ------------------------------------------- \\documentclass[a4paper,12pt]{article} \\usepackage[margin=4pt,noheadfoot]{geometry} \\usepackage{xcolor} %% to use colours \\usepackage{multicol} %% for multiple columns \\usepackage{keystroke} %% for keyboard key images \\usepackage[toc]{multitoc} %% for multi column table of contents \\usepackage{tocloft} %% to customize the table of contents \\setcounter{tocdepth}{2} %% only display 2 levels in the contents \\setlength{\\cftbeforesecskip}{0cm} %% make the toc more compact \\usepackage{listings} %% for nice code listings \\usepackage{caption} %% %% see overleaf page for better liststyle \\lstset{ captionpos=t, language=bash, basicstyle=\\ttfamily, %% fixed pitch font xleftmargin=0pt, %% margin on the left outside the frames framexleftmargin=0pt, framexrightmargin=0pt, framexbottommargin=5pt, framextopmargin=5pt, breaklines=true, %% break long code lines breakatwhitespace=false, %% break long code lines anywhere breakindent=10pt, %% reduce the indent from 20pt to 10 postbreak=\\mbox{{\\color{blue}\\small$\\Rightarrow$\\space}}, %% mark with arrow showstringspaces=false, %% dont show spaces within strings framerule=2pt, %% thickness of the frames frame=top,frame=bottom, rulecolor=\\color{lightgrey}, % frame=l % define special comment delimiters '##(' and ')' % moredelim=[s][\\color{grey}\\itshape\\footnotesize\\ttfamily]{~(}{)}, } %% source code settings \\usepackage{graphicx} %% to include images \\usepackage{fancybox} %% boxes with rounded corners \\usepackage{wrapfig} %% flow text around tables, images \\usepackage{tabularx} %% change width of tables \\usepackage[table]{xcolor} %% alternate row colour tables \\usepackage{booktabs} %% for heavier rules in tables \\usepackage[small,compact]{titlesec} %% sections more compact, less space \\usepackage{enumitem} %% more compact and better lists \\setlist{noitemsep} %% reduce list item spacing \\usepackage{hyperref} %% make urls into hyperlinks ?? no \\hypersetup{ %% add pdftex if only pdf output is required colorlinks=false, %% set up the colours for the hyperlinks linkcolor=black, %% internal document links black urlcolor=black, %% url links black frenchlinks=true, bookmarks=true, pdfpagemode=UseOutlines} \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in } %% define some colours to use \\definecolor{lightgrey}{gray}{0.70} \\definecolor{grey}{gray}{0.30} \\title{document.title} \\author{document.author} \\date{\\today} \\setlength{\\parindent}{0pt} %% label lists with stars \\renewcommand{\\labelitemi}{$\\star$} \\parindent=0pt \\parskip=6pt \\begin{document} "; print; clear; } # close the <pre> and <code> tags. (eof) { add "\n\\end{document} \n"; print; quit; } read; # line-relative character numbers [\n] { nochars; } # just print space as-is [:space:] { while [:space:]; print; # no silent exit on EOF clear; (eof) { .reparse } !(eof) { .restart } } # literal tokens, for readability maybe 'dot*' and 'comma*' [<{}(!BE,.;)>] { put; clear; add "<span class='nom-punct'>"; get; add "</span>"; print; clear; .restart } # command names, need to do some tricks to parse ++ -- a+ etc # here. This is because [:alpha:],[+-] etc is not a union set # and while cannot do "while [:alpha:],[+-] etc # subtle bug, [+-^0=] parses as a range!!! [a-z] [:alpha:],[-+^0=] { "0" { clear; add "zero"; } "^" { clear; add "escape"; } "+" { while [+]; } "-" { while [-]; } "=" { while [=]; } while [:alpha:]; # parse a+ or a- for the accumulator "a" { # while [+-] is bug because compile.pss thinks its a range class # not a list class while [-+]; "a+","a-" { put; } "a" { clear; add "add"; } } # one letter command abbreviations put; clear; add "#"; get; add "#"; replace "#k#" "#clip#"; replace "#K#" "#clop#"; replace "#D#" "#replace#"; replace "#d#" "#clear#"; replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; replace "#z#" "#delim#"; replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; # remove trailing and leading '#' char clip; clop; put; # writefile is also a command? # commands parsed above "a+","a-","zero","escape","++","--", "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit","write", "reparse","restart","nop" { clear; add "<span class='nom-command'>"; get; add "</span>"; print; clear; .restart } # words not commands "parse","eof","EOF","==" { clear; add "<span class='nom-word'>"; get; add "</span>"; print; clear; .restart } "begin" { clear; add "<span class='nom-keyword'>"; get; add "</span>"; print; clear; .restart } # lower case and check for command with error lower; "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit", "write", "zero","++","--","a+","a-","nop", "begin","parse","reparse","restart" { clear; add "<span class='nom-unknown'>"; get; add "</span>"; print; clear; .restart } # mark up in red errors clear; add "<span class='nom-unknown'>"; get; add "</span>"; print; clear; .restart } # single line comments '#' { (eof) { print; .restart } read; [#\n] { print; .restart } # multiline comments "#*" { until "*#"; put; !E"*#" { clear; add "<span class='nom-error'>"; get; add "</span>"; print; clear; .restart } clear; add "<span class='nom-comment'>"; get; add "</span>"; print; clear; .restart } whilenot [\n]; put; clear; add "<span class='nom-comment'>"; get; add "</span>"; print; clear; .restart } # double quoted text '"' { # no error checking. # see nom.syntax.reference.pss for error checking until '"'; put; !E'"' { clear; add "<span class='nom-error'>"; get; add "</span>"; print; clear; .restart } clear; add "<span class='nom-string'>"; get; add "</span>"; print; clear; .restart } # single quotes "'" { until "'"; put; !E"'" { clear; add "<span class='nom-error'>"; get; add "</span>"; print; clear; .restart } clear; add "<span class='nom-string'>"; get; add "</span>"; print; clear; .restart } # classes like [:space:] or [abc] or [a-z] # these are used in tests and also in while/whilenot # The *until* command will read past 'escaped' end characters eg \] # "[" { until "]"; put; !E"]" { clear; add "<span class='nom-error'>"; get; add "</span>"; print; clear; .restart } B"[:".E":]".!"[::]".!"[:]" { clip; clip; clop; clop; put; # list of [:class:] classes here. The character classes also # abbreviations in nom (which may be silly but anyway) "alnum","N","alpha","A","ascii","I","word","W","blank","B", "cntrl","C","digit","D","graph","G","lower","L","print","P", "punct","T","space","S","upper","U","xdigit","X" { clear; add "<span class='nom-class'>[:"; get; add ":]</span>"; print; clear; .restart } clear; add "<span class='nom-unknown'>[:"; get; add ":]</span>"; print; clear; .restart } # now [a-z] classes. I will not permit [\n-\t] silly # todo check this clear; add "<span class='nom-class'>"; get; add "</span>"; print; clear; .restart } !"" { put; clear; add "<span class='nom-error'>"; get; add " ??</span>"; print; clear; .restart } # not used, no parsing. parse>