#* ABOUT A [nom] script that pretty prints a *nom* script listing in latex. starting to work. This should be run with xelatex or lualatex not pdflatex. for unicode chars. but need to find the right font etc. I was going to do inside/outside parsing on multiline comments but I dont think I will bother. Also because eventually I will try to format multiline comments with /eg/text.tohtml.pss LATEX get the linux libertine font for greek charactes and others. * extended colour names >> \usepackage[dvipsnames]{xcolor} * a background color box >> \colorbox{BurntOrange}{orange background} * only colour the given text no all following text >> \textcolor{red}{easily} >> {\color{RubineRed} \rule{\linewidth}{0.5mm}} % the same * create a new command with a parameter >> \newcommand{\bb}[1]{\mathbb{#1}} \newline or \\ inserts a newline \hspace*{2em} inserts 2 letter space at the start of the line. \ttfamily changes font to fixed pitch. * escapes, these escapes are circular, see the trick in the script. ------ # \# $ \$ % \% & \& \ \textbackslash{} ^ \textasciicircum{} ~ \textasciitilde{} _ \_ { \{ } \} ,,,, BUGS # comments with \n immediately following dont work TESTING install xetex xelatex. * compile the doc with weird options ------ pep -f nom.tolatex.pss script.pss > test.tex; xelatex -synctex=1 -interaction=nonstopmode test.tex ,,, NOTES This script uses an interesting token throwing technique to markup the text into latex. I am writing my own latex listing code. This script like /eg/nom.tohtml.pss and /eg/nom.snippet.tohtml.pss actually expands nom abbreviation commands to their full name. I am not sure if this is a good idea. * page geometry in latex >> \\geometry{ left=1.0in,right=1.0in,top=1.0in,bottom=1.0in } * latex font sizes ----- \tiny \scriptsize \footnotesize \small \normalsize \large \Large \LARGE \huge \Huge ,,,, STATUS 20 march 2025 working. I dont indent multiline comments or multiline strings. DONE line numbers, colours, strings classes, make new commands for latex. the indents were too big: changed to 1ex not 1 em TODO - the tcolorbox does strange things across pages. - the colours are not great - also, try to get fonts that will display chess pieces etc. TOKENS The tokens below are not really true grammar parse tokens. They are a way to markup the text. It is necessary to 'escape' special [latex] characters before we add styling to the text. The replace* parse token does this. replace* this is just a convenience token used to replace [latex] special characters. bold* makes the attribute text bold italic* makes text italic, used for comments commentstyle* marks text up in the comment style stringstyle* marks text up in the comment style classstyle* marks text up in the comment style commandstyle* marks text up in the comment style keywordstyle* marks text up in the comment style errorstyle* marks text up in the comment style HISTORY 20 mar 2025 starting based on nom.tohtml.pss *# begin { # make a valid LaTeX document add " %% ------------------------------------------- %% latex generated by: nom.tolatex.pss %% the geometry package stops big margins. \\documentclass{article} %\\usepackage{libertine} %\\usepackage{fontspec} % for unusual characters. install font %\\setmainfont{Linux Libertine O} \\usepackage[margin=30pt,nohead]{geometry} % more color names \\usepackage[dvipsnames]{xcolor} \\definecolor{codegreen}{rgb}{0,0.6,0} \\definecolor{codegray}{rgb}{0.5,0.5,0.5} \\definecolor{codepurple}{rgb}{0.58,0,0.82} \\definecolor{backcolour}{rgb}{0.95,0.95,0.92} \\newcommand{\\nomcomment}[1]{{\\color{darkgray} \\textit{#1}}} \\newcommand{\\nomstring}[1]{\\textcolor{magenta}{#1}} \\newcommand{\\nomclass}[1]{\\textcolor{blue}{#1}} \\newcommand{\\nomcommand}[1]{{\\color{blue} \\textbf{#1}}} \\newcommand{\\nomkeyword}[1]{\\textcolor{violet}{#1}} \\newcommand{\\nomerror}[1]{\\textcolor{magenta}{#1}} % black and white %\\newcommand{\\nomcomment}[1]{{\\color{darkgray} \\textit{#1}}} %\\newcommand{\\nomcommand}[1]{\\textbf{#1}} %\\newcommand{\\nomkeyword}[1]{\\textbf{#1}} %\\newcommand{\\nomerror}[1]{\\textit{#1}} %\\newcommand{\\nomstring}[1]{\\textcolor{black}{#1}} %\\newcommand{\\nomclass}[1]{\\textcolor{black}{#1}} \\parindent=0pt \\parskip=6pt \\title{document.title} \\author{document.author} \\date{\\today} \\begin{document} \\ttfamily \\large "; print; clear; } # end the latex code listing and document (eof) { add "\\end{document} \n"; print; quit; } read; # line-relative character numbers. could be important for spliting # lines properly. [\n] { nochars; # line numbers clear; add " \\newline\n{\\tiny "; lines; add ":}\\hspace*{1em}"; print; # a cool trick to count leading space clear; nochars; while [ \t]; put; # 1ex = size of 1 'x', 1em = size of 1 'm' clear; add "\\hspace*{"; chars; add "ex}"; print; clear; .restart } # just print space as-is [:space:] { print; clear; .restart } # literal tokens, for readability maybe 'dot*' and 'comma*' [<{}(!BE,.;)>] { # this put/clear/get/ code is superfluous replace "{" "\\{"; replace "}" "\\}"; put; clear; add "\\textcolor{darkgray}{"; get; add "}"; print; clear; .restart } # command names, need to do some tricks to parse ++ -- a+ etc # here. This is because [:alpha:],[+-] etc is not a union set # and while cannot do "while [:alpha:],[+-] etc # subtle bug, [+-^0=] parses as a range!!! [a-z] [:alpha:],[-+^0=] { "0" { clear; add "zero"; } "^" { clear; add "escape"; } "+" { while [+]; } "-" { while [-]; } "=" { while [=]; } while [:alpha:]; # parse a+ or a- for the accumulator "a" { # while [+-] is bug because compile.pss thinks its a range class # not a list class while [-+]; "a+","a-" { put; } "a" { clear; add "add"; } } # one letter command abbreviations put; clear; add "#"; get; add "#"; replace "#k#" "#clip#"; replace "#K#" "#clop#"; replace "#D#" "#replace#"; replace "#d#" "#clear#"; replace "#t#" "#print#"; replace "#p#" "#pop#"; replace "#P#" "#push#"; replace "#u#" "#unstack#"; replace "#U#" "#stack#"; replace "#G#" "#put#"; replace "#g#" "#get#"; replace "#x#" "#swap#"; replace "#m#" "#mark#"; replace "#M#" "#go#"; replace "#r#" "#read#"; replace "#R#" "#until#"; replace "#w#" "#while#"; replace "#W#" "#whilenot#"; replace "#n#" "#count#"; replace "#c#" "#chars#"; replace "#C#" "#nochars#"; replace "#l#" "#lines#"; replace "#L#" "#nolines#"; replace "#v#" "#unescape#"; replace "#z#" "#delim#"; replace "#S#" "#state#"; replace "#q#" "#quit#"; replace "#s#" "#write#"; replace "#o#" "#nop#"; replace "#rs#" "#restart#"; replace "#rp#" "#reparse#"; # remove trailing and leading '#' char clip; clop; put; # writefile is also a command? # commands parsed above "a+","a-","zero","escape","++","--", "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit","write", "reparse","restart","nop" { # put the text in the replace* token ++; put; --; clear; add "commandstyle*replace*"; push;push; .reparse } # words not commands "begin","parse","eof","EOF","==" { ++; put; --; clear; add "keywordstyle*replace*"; push;push; .reparse } # lower case and check for command with error lower; "add","clip","clop","replace","upper","lower","cap","clear", "print","state","pop","push","unstack","stack","put","get","swap", "mark","go","read","until","while","whilenot", "count","zero","chars","lines","nochars","nolines", "escape","unescape","delim","quit", "write", "zero","++","--","a+","a-","nop", "begin","parse","reparse","restart" { # add an error marker? clear; add "#* ?? *#"; get; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } # add an error marker clear; add "#* ?? *#"; get; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } # single line comments '#' { (eof) { clear; add "\\#"; print; clear; .restart } read; [#\n] { clear; add "\\#"; print; clear; .restart } # multiline comments "#*" { until "*#"; put; !E"*#" { add "?? *#"; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } ++; put; --; clear; add "commentstyle*replace*"; push;push; .reparse } whilenot [\n]; ++; put; --; clear; add "commentstyle*replace*"; push;push; .reparse } # double quoted text '"' { # no error checking. # see nom.syntax.reference.pss for error checking until '"'; put; !E'"' { add '?? "'; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } ++; put; --; clear; add "stringstyle*replace*"; push;push; .reparse } # single quotes "'" { until "'"; put; !E"'" { add "?? '"; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } ++; put; --; clear; add "stringstyle*replace*"; push;push; .reparse } # classes "[" { until "]"; put; !E"]" { add "?? ]"; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } B"[:".E":]".!"[::]".!"[:]" { clip; clip; clop; clop; put; # list of [:class:] classes here. The character classes also # abbreviations in nom (which may be silly but anyway) "alnum","N","alpha","A","ascii","I","word","W","blank","B", "cntrl","C","digit","D","graph","G","lower","L","print","P", "punct","T","space","S","upper","U","xdigit","X" { clear; add "[:"; get; add ":]"; ++; put; --; clear; add "classstyle*replace*"; push;push; .reparse } clear; add "[?"; get; add "?]"; ++; put; --; clear; add "errorstyle*replace*"; push;push; .reparse } # now [a-z] classes. I will not permit [\n-\t] silly # todo check this ++; put; --; clear; add "classstyle*replace*"; push;push; .reparse } !"" { add " \\#* char ?? *\\#"; print; clear; .restart } # parse> # use to replace latex special characters pop; "replace*" { clear; get; replace "\\" "\\textbackslash{}"; replace "{" "\\{"; replace "}" "\\}"; # undo the circular escape replace "textbackslash\\" "textbackslash"; replace "textbackslash{\\" "textbackslash{"; replace "#" "\\#"; replace "$" "\\$"; replace "%" "\\%"; # the textup stops the nice but strange 'et' latin etc. replace "&" "\\textup{\\&}"; replace "_" "\\_"; replace "^" "\\textasciicircum{}"; replace "~" "\\textasciitilde{}"; replace "\n" "\\newline\\hspace*{3em} \n"; put; clear; pop; "italic*" { clear; add "\\textit{"; ++; get; --; add "}"; put; clear; } "bold*" { clear; add "\\textbf{"; ++; get; --; add "}"; put; clear; } # markup text as a comment "commentstyle*" { clear; add "\\nomcomment{"; ++; get; --; add "}"; put; clear; } # markup text as a string "stringstyle*" { clear; add "\\nomstring{"; ++; get; --; add "}"; put; clear; } # markup text as a class "classstyle*" { clear; add "\\nomclass{"; ++; get; --; add "}"; put; clear; } # markup text as a command "commandstyle*" { clear; add "\\nomcommand{"; ++; get; --; add "}"; put; clear; } # markup text as a keyword like eof etc "keywordstyle*" { clear; add "\\nomkeyword{"; ++; get; --; add "}"; put; clear; } # markup text as an error "errorstyle*" { clear; add "\\nomerror{"; ++; get; --; add "}"; put; clear; } get; print; clear; } push;