# Assembled with the script 'compile.pss' start: # # pars/compile.pss # # This is a parse-script which compiles parse-scripts (!). # # What is more, it can compile itself... so we can do # >> pep -f compile.pss compile.pss # This is useful because the resulting 'assembler' program (in sav.pp) # and printed to stdout, can be used as a replacement for 'asm.pp' # which is the default parse-script language compiler. The advantage # is that it is easier to maintain and add new sytax to compile.pss # than it is to 'asm.pp'. # # This script uses the virtual machine and engine implemented at # http://bumble.sf.net/books/pars/object/ It implements a script language # with a syntax reminiscent of sed and awk (much simpler than awk, but # more complex than sed). # # This code was created in a straightforward manner by adapting the # "assembled" code in 'asm.pp'. Some extra error checks were added. # Also, the EOF test was placed at the end of the script to remove # the 'last character' bug. It was evident that using the script # language is much more comfortable that hand-coding parse machine # assembler programs. # #REPLACING ASMPP # # We can use this script as a replacement for "asm.pp" or # "asm.handcode.pp" which is a script assembler written by hand in # the parse machine assembly format (1 command per line, labels, jumps, # tests, etc). # # * replace asm.pp with compile.pss # ----- # # generate the new script assembler # cp asm.pp asm.old; pep -f compile.pss compile.pss > asm.new.pp # cp asm.new.pp asm.pp # # test the new assembler (the script "r;t;t;t;d;" will be compiled # # by the new asm.pp which we have just created. # pep -e "r;t;t;t;d;" -i "abcd" # # output: aaabbbcccddd # ,,, # # The advantage of all this, is that it is much easier to maintain and add # new syntax to "pars/compile.pss" than it is directly, to "pars/asm.pp" # # For example asm.handcode.pp still uses "rabbit hops" to compile "quoteset" # tokens (an old version of the "ortestset" token), which is very inefficient # but compile.pss uses the new look-ahead technique. Also, there are negated # tests implemented in compile.pss but not implemented in asm.handcode.pp # # I will no longer continue to maintain asm.handcode.pp because its real # purpose was to "bootstrap" the current script. I will maintain working # copies of asm.pp as generated by this script in case of future errors. # # #NOTES # # The accumulator register was being used to generate true-jump # targets for testsets, but no longer # # This script can be used as the basis for many others which transform # scripts in some way. # # For example, to 'pretty-print' scripts, or to generate compilable c code # for a script using the functions in machine.methods.c. So, instead of # compiling to the "assembler" format for the machine (which is then # interpreted by the code in gh.c) we can compile to a series of c function # calls. This is c source code which can be compiled with gcc, producing an # executable version of the target script. # # This is an interesting idea, because we can transform a script into # compilable or executable code in a different language with a different # 'Machine' object. So, for example, we could write a Machine object in Ruby # or Java or Python or x86 assembler and then generate compilable or # executable code for that target environment. The compilable code would # consist of a series of method calls for the given object and test and # jumps. # # It will also be interesting to see if there is a significant # performance advantage in running executed, rather than interpreted # scripts. see compilable.c.pss for creating executable parse programs # from scripts # #GRAMMAR NOTES # # The machine cannot directly implement the ebnf structures of repetition # "{...}", optionality "[...]" or grouping "(...)", so we need to express all # grammar rules only in terms of alternation |. Quotesets are a handy way to # express this in a script, eg # # * bnf rule: alpha ::= a | b | c ; # >> 'a','b','c' { clear; add "alpha*"; push; } # # It is sometimes straightforward to factor out the above ebnf structures, # but the result is a greater number of rules. # #SEE ALSO # # At http://bumble.sf.net/books/pars/ # object/gh.c # the current implementation of the machine interpreter and debugger. # object/*.c # the virtual machine and components # compile.java.pss # attempts to compile scripts to java using a machine object in # object.java/ # compile.js.pss # attempts to compile scripts to javascript using a machine object in # object.javascript/ and nodejs # compilable.c.pss # compiles a script to c code (but is out of date as of 13 mar 2020) # asm.handcode.pp # a handcoded "assembly" compiler of the parse script language for # a previous version of the script language. # object/machine.methods.c # a set of functions to perform instructions on the parse machine. # #USAGE # # This script can be used to replace the hand-coded assembler file # "asm.handcode.pp" since it is much easier to maintain and add new syntax # for the parse-script language. I would like to preserve comments in the # output. # # We can also do the strange operation # >> pep -f compile.pss compile.pss # which actually creates an 'assembler' version of itself in 'sav.pp' # which should then be suitable for use as an 'asm.pp' substitute. # This is quite tricky to think about since it is so self-referential. # # But this is analogous to the equally strange operation # >> pep -f compilable.c.pss compilable.c.pss # which generates a compilable c language program of the compilable # script. # # It may be possible to compile this script into a stand-alone # executable with: (untested) # ---- # pep -f compilable.c.pss compile.pss > compile.c # gcc -o compile.exec -Lobject -lmachine -Iobject # ,,,, # # Also, it may be more useful for this script to print out its # output rather than writing it to file. # #TESTING # # * view how this script compiles an inline script # >> pep -f compile.pss -i "[aeiou] {a '(vowel)'; } t;d;" # # The result will also be saved in "sav.pp" # # * see how the compiled script runs # >> pep -a sav.pp -i "abcde" # output: a(vowel)bcde(vowel) # # * test "test chaining" compilation # >> pep -f compile.pss -i "r;'a','b','c'{t;}t;d;" # >> pep -a sav.pp -i "axbxcx" # output should be: aaxbbxccx # # * view/debug how compile.pss compiles test chains (or something else) # >> pep -If compile.pss -i "r;'a','b','c'{t;}t;d;" # # This provides interactive debugging of the compilation process. # #FIXED BUGS # # I was getting segmentation faults because of one-off errors etc # >> pep -f compile.pss compile.pss # Mainly fixed with "valgrind", but still a bug in "until" (in # object/machine.interp.c execute()... need to implement endsWith() function. # And one other bug. # * didnt need 2 jumps after "tests", just 1 jumpfalse or jumptrue # used "replace" to remove the unnecessary jump # #BUGS # # * doesnt catch B[abc] or E[a-z] type errors in scripts. Also # doesnt catch "r;r;d" type errors. # # compile.pss should not write the compiled script to stdout # because then asm.pp will do the same thing. easy enough to fix # in asm.pp as well (comment out final 2 "print" commands). # # comments not parsing correctly. # # Comments and multiline comments should not jump back to read # after deleting the comment, because there could be no more # input, and read will throw an error. They should jump to # the EOF end-of-file check. Or they could just call ".reparse" # which is safe but not very efficient. # #TODO # # We could allow single argument "replace" command eg: # >> replace "x"; # which is equivalent to # >> replace "x" "": # # Need to catch multiline quote errors when used with the # "until" command. # # Allow multiline arguments with the "add" command. # # Separate error checking into a new script, and make pep load # an assembled version of this error checker. This will allow # the same error checker to be used with the scripts # compile.java.pss compile.tcl.pss etc. # # #HISTORY # # 15 june 2021 # # Adding the commands "upper" "lower" and "cap" # "nochars" "nolines" # # 13 march 2020 # # Added compilation for multiline arguments for the "add" # command. Appears to be working. # # 15 sept 2019 # # realised that I can have an eof error check block at # the end of the script just before all the tokens are # pushed back on the stack. See the 1 and 2 token eof error # check in this script. # # 13 september 2019 # # Adding "mark" and "go" commands here. # Improved unterminated quote '/" error messages. In general # it is much more helpful to catch the error when it happens # and print an informative message (with line-number etc). # # 5 september 2019 # # Added a "stack" and "unstack" command to the machine and # to compile.pss # # 29 august 2019 # # Improved some error checks. Could make the error check code # more succinct. # # Changed the way testeof and testtape are parsed to include # them with other tests. This also allows to negate them with # !(==) and !(eof) and also to concatenate with other tests # eg: (eof),B"abc" {} # added extra syntax and <==> for these tests. # # 25 august 2019 # # Realised that I dont need 2 jumps for OR test concatenation (with ',') # That will greatly improve script interpretation efficiency. # # Added AND concatenation logic to tests so now we can do # # * test if workspace begins with 'a' AND ends with 'z' # >> B"a".E"z" {} # # Changed the way .reparse and .restart are parsed and compiled. # These are now parsed as 2 tokens ".*word*". This allows me to # use '.' for AND logic concatenation in tests. It also allows # me to provide special semantic meaning to commands beginning with # a dot, which seems like a good thing. # # added "delim" command here and in machine.c and machine.interp.c, # to change the stack delimiter. # # 24 august 2019 # # The "state*" token should be separated into "testeof*" and # "testtape*" and then the 2 tests can be elided.(done) # # The conversion to a "test*{*" rule and ellision of # multiple tests will make this script much more compact and hopefully # just as readable. Also, as a side effect, negation of all # tests will be available soon. Also, it is possible to chain together # different types of tests. # # Converted quoteset to "ortestset*" and "andtestset*". # I will introduce a new notation namely: # # * check if workspace begins with "abc" AND ends with "xyz" # >> B"abc" . E"xyz" { commands } # # so the dot will become an "AND" (&&) concatenator of tests # and "," will remain as the "OR" (||) concatenator of tests # In these || and && test lists any type of test can be # included for example # # * check if workspace starts with "a", only contains chars a|b|c # * and ends with the letter "z" (using "." AND concatenator) # >> B"abc" . [abc] . E"z" { ... } # # experimenting with the new technique to create negated tokens # classes. # # * test negated tokens for "testis" # >> /usr/local/bin/pp -f compile.pss -i 'r;!"b",!"a"{nop;}' # # 23 august 2019 # # adding begintests and endtests to the quoteset logic. But # need to juggle the combinations. Also could add classes # and notclasses. more or less working. But should actually # changing parsing to make quotesets more flexible, see the # section of the script for details. # # The new quoteset compilation seems to be working. # Needs more testing. We can now use compile.pss as a replacement # for asm.pp. # # Converting to a new quoteset (eg: 'n','m' {...} ) lookahead compiling # technique. Also we can compile comments with rules for # "comment*command*" and "command*comment*" and "comment*comment*" -> # "comment*". Instead of the current shenanigins. # # 14 august 2019 # # trying to preserve comments here but cant reduce comments # with tokens like {* }* !* etc because we never retrieve # the attributes for those tokens. more thought required. # # added a !"text" {...} syntax. very simple to add here. # did the same in compilable.c.pss # # added a "begin" block to this (for start configurations of scripts). # Also need to improve the compilation of quotesets tokens which produce # nifty but very poor code. need 'tapereplace' command for this. # # 6 august 2019 # # would be handy to have multiline quotes. should be # easy to include. In fact they probably should already work, dont # know why not... # # 30 july 2019 # # Fixed the last character bug by putting the EOF test at the very end of # the file. The translation is complete and the script appears to be # working but no doubt will contain bugs. Initially translated from # asm.pp. # # read #-------------- testclass [:space:] jumpfalse block.end.12954 clear jump parse block.end.12954: #--------------- # We can ellide all these single character tests, because # the stack token is just the character itself with a * # Braces {} are used for blocks, ',' and '.' for concatenating # tests with OR or AND logic. 'B' and 'E' for begin and end # tests. testis "{" jumptrue 16 testis "}" jumptrue 14 testis ";" jumptrue 12 testis "," jumptrue 10 testis "." jumptrue 8 testis "!" jumptrue 6 testis "B" jumptrue 4 testis "E" jumptrue 2 jump block.end.13316 put add "*" push jump parse block.end.13316: #--------------- # format: "text" testis "\"" jumpfalse block.end.13669 # save the line number in case there is no terminating # quote. clear ll put clear add "\"" until "\"" testends "\"" jumptrue block.end.13611 clear add "Unterminated quote (\") starting at line " get add " !\n" print quit block.end.13611: put clear add "quote*" push jump parse block.end.13669: #--------------- # format: 'text', single quotes are converted to double quotes # but we must escape embedded double quotes. testis "'" jumpfalse block.end.14169 # save the line number in case there is no terminating # quote. clear ll put clear until "'" testends "'" jumptrue block.end.14044 clear add "Unterminated quote (\") starting at line " get add "!\n" print quit block.end.14044: clip escape "\"" put clear add "\"" get add "\"" put clear add "quote*" push jump parse block.end.14169: #--------------- # formats: [:space:] [a-z] [abcd] [:alpha:] etc testis "[" jumpfalse block.end.14317 until "]" put clear add "class*" push jump parse block.end.14317: #--------------- # formats: (eof) (==) etc. I may change this syntax to just # EOF and == testis "(" jumpfalse block.end.14826 clear until ")" clip put testis "eof" jumptrue 4 testis "EOF" jumptrue 2 jump block.end.14511 clear add "eof*" push jump parse block.end.14511: testis "==" jumpfalse block.end.14564 clear add "tapetest*" push jump parse block.end.14564: add " << unknown test near line " ll add " of script.\n" add " bracket () tests are \n" add " (eof) test if end of stream reached. \n" add " (==) test if workspace is same as current tape cell \n" print clear quit block.end.14826: #--------------- # multiline and single line comments, eg #... and #* ... *# testis "#" jumpfalse block.end.15875 clear read testis "\n" jumpfalse block.end.14962 clear jump parse block.end.14962: # checking for multiline comments of the form "#* \n\n\n *#" # these are just ignored at the moment (deleted) testis "*" jumpfalse block.end.15721 # save the line number for possible error message later clear ll put clear until "*#" testends "*#" jumpfalse block.end.15466 # convert to # single-line comments clip clip #put; clear; add "#*"; get; add "*#"; replace "\n" "\n#" # create a "comment" parse token put clear add "comment*" push jump parse block.end.15466: # make an unterminated multiline comment an error # to ease debugging of scripts. clear add "unterminated multiline comment #* ... *# \n" add "stating at line number " get add "\n" print clear quit block.end.15721: # single line comments. some will get lost. put clear add "#" get until "\n" clip put clear add "comment*" push jump parse block.end.15875: #---------------------------------- # parse command words (and abbreviations) # legal characters for keywords (commands) testclass [abcdefghijklmnopqrstuvwxyzBEKGPRUWS+-<>0^] jumptrue block.end.16256 # error message about a misplaced character put clear add "!! Misplaced character '" get add "' in script near line " ll add " (character " cc add ") \n" print clear bail block.end.16256: # my testclass implementation cannot handle complex lists # eg [a-z+-] this is why I have to write out the whole alphabet while [abcdefghijklmnopqrstuvwxyzBEOFKGPRUWS+-<>0^] #---------------------------------- # KEYWORDS # here we can test for all the keywords (command words) and their # abbreviated one letter versions (eg: clip k, clop K etc). Then # we can print an error message and abort if the word is not a # legal keyword for the parse-edit language # make ll an alias for "lines" and cc an alias for chars testis "lines" jumpfalse block.end.16840 clear add "ll" block.end.16840: testis "chars" jumpfalse block.end.16872 clear add "cc" block.end.16872: # one letter command abbreviations testis "a" jumpfalse block.end.16939 clear add "add" block.end.16939: testis "k" jumpfalse block.end.16969 clear add "clip" block.end.16969: testis "K" jumpfalse block.end.16999 clear add "clop" block.end.16999: testis "D" jumpfalse block.end.17032 clear add "replace" block.end.17032: testis "d" jumpfalse block.end.17063 clear add "clear" block.end.17063: testis "t" jumpfalse block.end.17094 clear add "print" block.end.17094: testis "p" jumpfalse block.end.17123 clear add "pop" block.end.17123: testis "P" jumpfalse block.end.17153 clear add "push" block.end.17153: testis "u" jumpfalse block.end.17186 clear add "unstack" block.end.17186: testis "U" jumpfalse block.end.17217 clear add "stack" block.end.17217: testis "G" jumpfalse block.end.17246 clear add "put" block.end.17246: testis "g" jumpfalse block.end.17275 clear add "get" block.end.17275: testis "x" jumpfalse block.end.17305 clear add "swap" block.end.17305: testis ">" jumpfalse block.end.17333 clear add "++" block.end.17333: testis "<" jumpfalse block.end.17361 clear add "--" block.end.17361: testis "m" jumpfalse block.end.17391 clear add "mark" block.end.17391: testis "M" jumpfalse block.end.17419 clear add "go" block.end.17419: testis "r" jumpfalse block.end.17449 clear add "read" block.end.17449: testis "R" jumpfalse block.end.17480 clear add "until" block.end.17480: testis "w" jumpfalse block.end.17511 clear add "while" block.end.17511: testis "W" jumpfalse block.end.17545 clear add "whilenot" block.end.17545: # we can probably omit tests and jumps since they are not # designed to be used in scripts (only assembled parse programs). # "b" { clear; add "jump"; } # "j" { clear; add "jumptrue"; } # "J" { clear; add "jumpfalse"; } # "=" { clear; add "testis"; } # "?" { clear; add "testclass"; } # "b" { clear; add "testbegins"; } # "B" { clear; add "testends"; } # "E" { clear; add "testeof"; } # "*" { clear; add "testtape"; } # testis "n" jumpfalse block.end.18023 clear add "count" block.end.18023: testis "+" jumpfalse block.end.18051 clear add "a+" block.end.18051: testis "-" jumpfalse block.end.18079 clear add "a-" block.end.18079: testis "0" jumpfalse block.end.18109 clear add "zero" block.end.18109: testis "c" jumpfalse block.end.18141 clear add "cc" block.end.18141: testis "chars" jumpfalse block.end.18173 clear add "cc" block.end.18173: testis "l" jumpfalse block.end.18205 clear add "ll" block.end.18205: testis "lines" jumpfalse block.end.18237 clear add "ll" block.end.18237: testis "^" jumpfalse block.end.18269 clear add "escape" block.end.18269: testis "v" jumpfalse block.end.18303 clear add "unescape" block.end.18303: testis "z" jumpfalse block.end.18334 clear add "delim" block.end.18334: testis "S" jumpfalse block.end.18365 clear add "state" block.end.18365: testis "q" jumpfalse block.end.18395 clear add "quit" block.end.18395: testis "Q" jumpfalse block.end.18425 clear add "bail" block.end.18425: testis "s" jumpfalse block.end.18456 clear add "write" block.end.18456: testis "o" jumpfalse block.end.18485 clear add "nop" block.end.18485: testis "rs" jumpfalse block.end.18519 clear add "restart" block.end.18519: testis "rp" jumpfalse block.end.18553 clear add "reparse" block.end.18553: # some extra syntax for testeof and testtape testis "" jumptrue 4 testis "" jumptrue 2 jump block.end.18664 put clear add "eof*" push jump parse block.end.18664: testis "<==>" jumpfalse block.end.18722 put clear add "tapetest*" push jump parse block.end.18722: # "nochars", "nolines" { # put; clear; # add "The command '"; get; add "' (near line "; ll; add ")\n"; # add "has not been implemented, but needs to be. \n"; # print; clear; quit; # } # testis "add" jumptrue 102 testis "clip" jumptrue 100 testis "clop" jumptrue 98 testis "replace" jumptrue 96 testis "clear" jumptrue 94 testis "upper" jumptrue 92 testis "lower" jumptrue 90 testis "cap" jumptrue 88 testis "print" jumptrue 86 testis "pop" jumptrue 84 testis "push" jumptrue 82 testis "unstack" jumptrue 80 testis "stack" jumptrue 78 testis "put" jumptrue 76 testis "get" jumptrue 74 testis "swap" jumptrue 72 testis "++" jumptrue 70 testis "--" jumptrue 68 testis "mark" jumptrue 66 testis "go" jumptrue 64 testis "read" jumptrue 62 testis "until" jumptrue 60 testis "while" jumptrue 58 testis "whilenot" jumptrue 56 testis "jump" jumptrue 54 testis "jumptrue" jumptrue 52 testis "jumpfalse" jumptrue 50 testis "testis" jumptrue 48 testis "testclass" jumptrue 46 testis "testbegins" jumptrue 44 testis "testends" jumptrue 42 testis "testeof" jumptrue 40 testis "testtape" jumptrue 38 testis "count" jumptrue 36 testis "a+" jumptrue 34 testis "a-" jumptrue 32 testis "zero" jumptrue 30 testis "cc" jumptrue 28 testis "ll" jumptrue 26 testis "nochars" jumptrue 24 testis "nolines" jumptrue 22 testis "escape" jumptrue 20 testis "unescape" jumptrue 18 testis "delim" jumptrue 16 testis "state" jumptrue 14 testis "quit" jumptrue 12 testis "bail" jumptrue 10 testis "write" jumptrue 8 testis "nop" jumptrue 6 testis "reparse" jumptrue 4 testis "restart" jumptrue 2 jump block.end.19450 put clear add "word*" push jump parse block.end.19450: #------------ # the .reparse command and "parse label" is a simple way to # make sure that all shift-reductions occur. It should be used inside # a block test, so as not to create an infinite loop. testis "parse>" jumpfalse block.end.19766 clear add "parse:" put clear add "command*" push jump parse block.end.19766: # -------------------- # try to implement begin-blocks, which are only executed # once, at the beginning of the script (similar to awk's BEGIN {} rules) testis "begin" jumpfalse block.end.19982 put add "*" push jump parse block.end.19982: put add "Pep syntax error: unknown command '" get add "' \n" add "on line " ll add " (or character " cc add ")" add "of input (file or stream). \n" print clear quit # ---------------------------------- # PARSING PHASE: # the lexing phase finishes here, and below is the # parse/compile phase of the script. Here we pop tokens # off the stack and check for sequences of tokens eg word*semicolon* # If we find a valid series of tokens, we "shift-reduce" or "resolve" # the token series eg word*semicolon* --> command* # At the same time, we manipulate (transform) the attributes on the # tape, as required. So Tape=|pop|;| becomes |\npop| where the # bars | indicate tape cells. (2 tapes cells are merged into 1). # Each time the stack is reduced, the tape must also be reduced # parse: #------------------------------------- # 2 tokens #------------------------------------- pop pop # All of the below are currently errors, but may not # be in the future if we expand the syntax of the parse # language. Also consider: # begintext* endtext* quoteset* notclass*, !* ,* ;* B* E* # It is nice to trap the errors here because we can emit some # hopefully not-very-cryptic error messages with a line number. # Otherwise the script writer has to debug with # pep -a asm.pp scriptfile -I testis "word*word*" jumptrue 50 testis "word*}*" jumptrue 48 testis "word*begintext*" jumptrue 46 testis "word*endtext*" jumptrue 44 testis "word*!*" jumptrue 42 testis "word*,*" jumptrue 40 testis "quote*word*" jumptrue 38 testis "quote*class*" jumptrue 36 testis "quote*state*" jumptrue 34 testis "quote*}*" jumptrue 32 testis "quote*begintext*" jumptrue 30 testis "quote*endtext*" jumptrue 28 testis "class*word*" jumptrue 26 testis "class*quote*" jumptrue 24 testis "class*class*" jumptrue 22 testis "class*state*" jumptrue 20 testis "class*}*" jumptrue 18 testis "class*begintext*" jumptrue 16 testis "class*endtext*" jumptrue 14 testis "class*!*" jumptrue 12 testis "notclass*word*" jumptrue 10 testis "notclass*quote*" jumptrue 8 testis "notclass*class*" jumptrue 6 testis "notclass*state*" jumptrue 4 testis "notclass*}*" jumptrue 2 jump block.end.21909 push push add "error near line " ll add " (char " cc add ")" add " of script (missing semicolon?) \n" print clear quit block.end.21909: testis "{*;*" jumptrue 6 testis ";*;*" jumptrue 4 testis "}*;*" jumptrue 2 jump block.end.22098 push push add "error near line " ll add " (char " cc add ")" add " of script: misplaced semi-colon? ; \n" print clear quit block.end.22098: # comma errors. testis ",*;*" jumptrue 6 testis ",*{*" jumptrue 4 testis ",*}*" jumptrue 2 jump block.end.22298 push push add "error near line " ll add " (char " cc add ")" add " of script: misplaced comma? ; \n" print clear quit block.end.22298: testis ",*{*" jumpfalse block.end.22471 push push add "Pep: error near line " ll add " (char " cc add ")" add " of script: extra comma in list? \n" print clear quit block.end.22471: testis "command*;*" jumptrue 4 testis "commandset*;*" jumptrue 2 jump block.end.22663 push push add "Pep: error near line " ll add " (char " cc add ")" add " of script: extra semi-colon? \n" print clear quit block.end.22663: testis "!*!*" jumpfalse block.end.22929 push push add "Pep: error near line " ll add " (char " cc add ")" add " of script: \n double negation '!!' is not implemented \n" add " and probably won't be, because what would be the point? \n" print clear quit block.end.22929: testis "!*{*" jumptrue 4 testis "!*;*" jumptrue 2 jump block.end.23243 push push add "Pep: error near line " ll add " (char " cc add ")" add " of script: misplaced negation operator (!)? \n" add " The negation operator precedes tests, for example: \n" add " !B'abc'{ ... } or !(eof),!'abc'{ ... } \n" print clear quit block.end.23243: testis ",*command*" jumpfalse block.end.23413 push push add "error near line " ll add " (char " cc add ")" add " of script: misplaced comma? \n" print clear quit block.end.23413: testis "!*command*" jumpfalse block.end.23612 push push add "error near line " ll add " (at char " cc add ") \n" add " The negation operator (!) cannot precede a command \n" print clear quit block.end.23612: testis ";*{*" jumptrue 6 testis "command*{*" jumptrue 4 testis "commandset*{*" jumptrue 2 jump block.end.23815 push push add "error near line " ll add " (char " cc add ")" add " of script: no test for brace block? \n" print clear quit block.end.23815: testis "{*}*" jumpfalse block.end.23946 push push add "error near line " ll add " of script: empty braces {}. \n" print clear quit block.end.23946: testis "B*class*" jumptrue 4 testis "E*class*" jumptrue 2 jump block.end.24174 push push add "error near line " ll add " of script:\n classes ([a-z], [:space:] etc). \n" add " cannot use the 'begin' or 'end' modifiers (B/E) \n" print clear quit block.end.24174: testis "}*command*" jumpfalse block.end.24321 push push add "error near line " ll add " of script: extra closing brace '}' ?. \n" print clear quit block.end.24321: testis "comment*{*" jumpfalse block.end.24510 push push add "error near line " ll add " of script: comments cannot occur between \n" add " a test and a brace ({). \n" print clear quit block.end.24510: #------------ # the .restart command just jumps to the start: label # (which is usually followed by a "read" command) # but '.' is also the AND concatenator, which seems ambiguous, # but the parsing works. testis ".*word*" jumpfalse block.end.25204 clear ++ get -- testis "restart" jumpfalse block.end.24882 clear add "jump start" put clear add "command*" push jump parse block.end.24882: testis "reparse" jumpfalse block.end.24997 clear add "jump parse" put clear add "command*" push jump parse block.end.24997: push push add "error near line " ll add " (char " cc add ")" add " of script: \n" add " misplaced dot '.' (use for AND logic or in .reparse/.restart \n" print clear quit block.end.25204: #----------------------------------------- # compiling comments so as to transfer them to the compiled # file. # implement these rules to conserve comments testis "comment*command*" jumptrue 6 testis "command*comment*" jumptrue 4 testis "commandset*comment*" jumptrue 2 jump block.end.25526 clear get add "\n" ++ get -- put clear add "command*" push jump parse block.end.25526: testis "comment*comment*" jumpfalse block.end.25640 clear get add "\n" ++ get -- put clear add "comment*" push jump parse block.end.25640: # ----------------------- # negated tokens. # This is a new more elegant way to negate a whole set of # tests (tokens) where the negation logic is stored on the # stack, not in the current tape cell. We just add "not" to # the stack token. # eg: ![:alpha:] ![a-z] ![abcd] !"abc" !B"abc" !E"xyz" # This format is used to indicate a negative test for # a brace block. eg: ![aeiou] { add "< not a vowel"; print; clear; } testis "!*quote*" jumptrue 12 testis "!*class*" jumptrue 10 testis "!*begintext*" jumptrue 8 testis "!*endtext*" jumptrue 6 testis "!*eof*" jumptrue 4 testis "!*tapetest*" jumptrue 2 jump block.end.26383 # a simplification: just replace the token name with its # negative. replace "!*" "not" push # now get the token-value # added an extra ++ here. get -- put ++ clear jump parse block.end.26383: #----------------------------------------- # format: E"text" or E'text' # This format is used to indicate a "workspace-ends-with" text before # a brace block. testis "E*quote*" jumpfalse block.end.26655 clear add "endtext*" push get -- put ++ clear jump parse block.end.26655: #----------------------------------------- # format: B"sometext" or B'sometext' # A 'B' preceding some quoted text is used to indicate a # 'workspace-begins-with' test, before a brace block. testis "B*quote*" jumpfalse block.end.26966 clear add "begintext*" push get -- put ++ clear jump parse block.end.26966: #-------------------------------------------- # ebnf: command := word, ';' ; # formats: "pop; push; clear; print; " etc # all commands need to end with a semi-colon except for # .reparse and .restart testis "word*;*" jumpfalse block.end.27664 clear # check if command requires parameter get testis "add" jumptrue 20 testis "until" jumptrue 18 testis "while" jumptrue 16 testis "whilenot" jumptrue 14 testis "mark" jumptrue 12 testis "go" jumptrue 10 testis "escape" jumptrue 8 testis "unescape" jumptrue 6 testis "delim" jumptrue 4 testis "replace" jumptrue 2 jump block.end.27534 put clear add "Pep: '" get add "'" add " << command needs an argument, on line " ll add " of script.\n" print clear quit block.end.27534: clear add "command*" # no need to format tape cells because current cell contains word push jump parse block.end.27664: #----------------------------------------- # ebnf: commandset := command , command ; testis "command*command*" jumptrue 4 testis "commandset*command*" jumptrue 2 jump block.end.27988 clear add "commandset*" push # format the tape attributes. Add the next command on a newline -- get add "\n" ++ get -- put ++ clear jump parse block.end.27988: #------------------- # here we begin to parse "test*" and "ortestset*" and "andtestset*" # #------------------- # eg: B"abc" {} or E"xyz" {} testis "begintext*{*" jumptrue 12 testis "endtext*{*" jumptrue 10 testis "quote*{*" jumptrue 8 testis "class*{*" jumptrue 6 testis "eof*{*" jumptrue 4 testis "tapetest*{*" jumptrue 2 jump block.end.29011 # set accumulator == 0 zero testbegins "begin" jumpfalse block.end.28304 clear add "testbegins " block.end.28304: testbegins "end" jumpfalse block.end.28343 clear add "testends " block.end.28343: testbegins "quote" jumpfalse block.end.28382 clear add "testis " block.end.28382: testbegins "class" jumpfalse block.end.28424 clear add "testclass " block.end.28424: # clear the tapecell for testeof and testtape because # they take no arguments. testbegins "eof" jumpfalse block.end.28556 clear put add "testeof " block.end.28556: testbegins "tapetest" jumpfalse block.end.28605 clear put add "testtape " block.end.28605: get add "\n" add "jumptrue 2 \n" # this extra jump has utility when we parse ortestsets and # andtestsets. add "jump block.end." # the final jumpfalse + target will be added when # "test*{*commandset*}*" is parsed, or when # "ortestset*{*commandset*}*" # "andtestset*{*commandset*}*" put a+ a+ a+ a+ clear add "test*{*" push push jump parse block.end.29011: #------------------- # negated tests # eg: !B"xyz {} # !E"xyz" {} # !"abc" {} # ![a-z] {} testis "notbegintext*{*" jumptrue 12 testis "notendtext*{*" jumptrue 10 testis "notquote*{*" jumptrue 8 testis "notclass*{*" jumptrue 6 testis "noteof*{*" jumptrue 4 testis "nottapetest*{*" jumptrue 2 jump block.end.29980 # set accumulator == 0 zero testbegins "notbegin" jumpfalse block.end.29310 clear add "testbegins " block.end.29310: testbegins "notend" jumpfalse block.end.29352 clear add "testends " block.end.29352: testbegins "notquote" jumpfalse block.end.29394 clear add "testis " block.end.29394: testbegins "notclass" jumpfalse block.end.29439 clear add "testclass " block.end.29439: # clear the tapecell for testeof and testtape because # they take no arguments. testbegins "noteof" jumpfalse block.end.29574 clear put add "testeof " block.end.29574: testbegins "nottapetest" jumpfalse block.end.29626 clear put add "testtape " block.end.29626: get add "\n" add "jumpfalse 2 \n" # this extra jump has utility when we parse ortestsets and # andtestsets. add "jump block.end." # the final jumpfalse + target will be added later # use the accumulator to store the incremented jump target put a+ a+ a+ a+ clear add "test*{*" push push jump parse block.end.29980: #------------------- # 3 tokens #------------------- pop #----------------------------- # some 3 token errors!!! # there are many other of these errors but I am not going # to write them all. testis "{*quote*;*" jumptrue 8 testis "{*begintext*;*" jumptrue 6 testis "{*endtext*;*" jumptrue 4 testis "{*class*;*" jumptrue 2 jump block.end.30421 push push push add "error near line " ll add " (char " cc add ")" add " of script (misplaced semicolon?) \n" print clear quit block.end.30421: # to simplify subsequent tests, transmogrify a single command # to a commandset (multiple commands). testis "{*command*}*" jumpfalse block.end.30617 clear add "{*commandset*}*" push push push jump parse block.end.30617: # rule #',' ortestset ::= ',' test '{' # trigger a transmogrification from test to ortestset token # and # '.' andtestset ::= '.' test '{' testis ",*test*{*" jumpfalse block.end.30854 clear add ",*ortestset*{*" push push push jump parse block.end.30854: # trigger a transmogrification from "test" to "andtest" by # looking backwards in the stack testis ".*test*{*" jumpfalse block.end.31091 # the jump counter is 1 too high for AND tests a- clear add ".*andtestset*{*" push push push jump parse block.end.31091: # errors! mixing AND and OR concatenation testis ",*andtestset*{*" jumptrue 4 testis ".*ortestset*{*" jumptrue 2 jump block.end.31423 # push the tokens back to make debugging easier push push push add " error: mixing AND (.) and OR (,) concatenation in \n" add " in script near line " ll add " (character " cc add ") \n" print clear quit block.end.31423: #-------------------------------------------- # ebnf: command := keyword , quoted-text , ";" ; # format: add "text"; testis "word*quote*;*" jumpfalse block.end.33089 clear get testis "replace" jumpfalse block.end.31763 # error add "< command requires 2 parameters, not 1 \n" add "near line " ll add " of script. \n" print clear quit block.end.31763: testis "add" jumptrue 18 testis "until" jumptrue 16 testis "while" jumptrue 14 testis "whilenot" jumptrue 12 testis "escape" jumptrue 10 testis "mark" jumptrue 8 testis "go" jumptrue 6 testis "unescape" jumptrue 4 testis "delim" jumptrue 2 jump block.end.32901 # check here or in error.pss for multiline quoted arguments # for "mark" "go" "until" etc because they are not allowed. clear add "command*" push # a command plus argument, eg add "this" -- get # allow multiline text in (only) the add command # we do this by turning a multiline "add" command into a # sequence of single line "add" commands (because that is what # the assembler format allows). Actually, I could just write # replace "\n" "\\n"; which should work but would be much less # readable in the assembled file. testis "add" jumpfalse block.end.32603 add " " ++ get replace "\n" "\\n\"\nadd \"" -- put ++ clear jump parse block.end.32603: # maybe it would be useful for the until command to # allow multiline as well testis "until" jumpfalse block.end.32826 add " " ++ get replace "\n" "\\n" -- put ++ clear jump parse block.end.32826: add " " ++ get -- put ++ clear jump parse block.end.32901: # error, superfluous argument add ": command does not take an argument \n" add "near line " ll add " of script. \n" print clear #state quit block.end.33089: #---------------------------------- # format: "while [:alpha:] ;" or whilenot [a-z] ; testis "word*class*;*" jumpfalse block.end.33597 clear get testis "while" jumptrue 4 testis "whilenot" jumptrue 2 jump block.end.33447 clear add "command*" push # a command plus argument, eg while [a-z] -- get add " " ++ get -- put ++ clear jump parse block.end.33447: # error add " < command cannot have a class argument \n" add "line " ll add ": error in script \n" print clear quit block.end.33597: # ------------------------------- # 4 tokens # ------------------------------- pop #------------------------------------- # ebnf: command := replace , quote , quote , ";" ; # example: replace "and" "AND" ; testis "word*quote*quote*;*" jumpfalse block.end.34258 clear get testis "replace" jumpfalse block.end.34138 clear add "command*" push #--------------------------- # a command plus 2 arguments, eg replace "this" "that" -- get add " " ++ get add " " ++ get -- -- put ++ clear jump parse block.end.34138: add " << command does not take 2 quoted arguments. \n" add " on line " ll add " of script.\n" quit block.end.34258: #------------------------------------- # format: begin { #* commands *# } # "begin" blocks which are only executed once (they # will are assembled before the "start:" label. They must come before # all other commands. # "begin*{*command*}*", testis "begin*{*commandset*}*" jumpfalse block.end.34642 clear ++ ++ get -- -- put clear add "beginblock*" push jump parse block.end.34642: # ------------- # parses and compiles concatenated tests # eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... testis "begintext*,*ortestset*{*" jumptrue 12 testis "endtext*,*ortestset*{*" jumptrue 10 testis "quote*,*ortestset*{*" jumptrue 8 testis "class*,*ortestset*{*" jumptrue 6 testis "eof*,*ortestset*{*" jumptrue 4 testis "tapetest*,*ortestset*{*" jumptrue 2 jump block.end.35566 testbegins "begin" jumpfalse block.end.34971 clear add "testbegins " block.end.34971: testbegins "end" jumpfalse block.end.35011 clear add "testends " block.end.35011: testbegins "quote" jumpfalse block.end.35051 clear add "testis " block.end.35051: testbegins "class" jumpfalse block.end.35094 clear add "testclass " block.end.35094: # clear the tapecell for testeof and testtape because # they take no arguments. testbegins "eof" jumpfalse block.end.35229 clear put add "testeof " block.end.35229: testbegins "tapetest" jumpfalse block.end.35279 clear put add "testtape " block.end.35279: get add "\n" add "jumptrue " count add "\n" ++ ++ get -- -- put clear # this works as long as we dont mix AND and OR concatenations # add "test*{*"; # need to change to this add "ortestset*{*" push push a+ a+ jump parse block.end.35566: # A collection of negated tests. testis "notbegintext*,*ortestset*{*" jumptrue 12 testis "notendtext*,*ortestset*{*" jumptrue 10 testis "notquote*,*ortestset*{*" jumptrue 8 testis "notclass*,*ortestset*{*" jumptrue 6 testis "noteof*,*ortestset*{*" jumptrue 4 testis "nottapetest*,*ortestset*{*" jumptrue 2 jump block.end.36363 testbegins "notbegin" jumpfalse block.end.35838 clear add "testbegins " block.end.35838: testbegins "notend" jumpfalse block.end.35881 clear add "testends " block.end.35881: testbegins "notquote" jumpfalse block.end.35924 clear add "testis " block.end.35924: testbegins "notclass" jumpfalse block.end.35970 clear add "testclass " block.end.35970: testbegins "noteof" jumpfalse block.end.36017 clear put add "testeof " block.end.36017: testbegins "nottapetest" jumpfalse block.end.36070 clear put add "testtape " block.end.36070: get add "\n" add "jumpfalse " count add "\n" ++ ++ get -- -- put clear # this works as long as we dont mix AND and OR concatenations add "ortestset*{*" # need to change to this # add "ortestset*{*"; push push a+ a+ jump parse block.end.36363: # this works as long as we dont mix AND and OR concatenations # ------------- # AND logic # parses and compiles concatenated AND tests # eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... # it is possible to elide this block with the negated block # for compactness but maybe readability is not as good. testis "begintext*.*andtestset*{*" jumptrue 12 testis "endtext*.*andtestset*{*" jumptrue 10 testis "quote*.*andtestset*{*" jumptrue 8 testis "class*.*andtestset*{*" jumptrue 6 testis "eof*.*andtestset*{*" jumptrue 4 testis "tapetest*.*andtestset*{*" jumptrue 2 jump block.end.37293 testbegins "begin" jumpfalse block.end.36907 clear add "testbegins " block.end.36907: testbegins "end" jumpfalse block.end.36947 clear add "testends " block.end.36947: testbegins "quote" jumpfalse block.end.36987 clear add "testis " block.end.36987: testbegins "class" jumpfalse block.end.37030 clear add "testclass " block.end.37030: testbegins "eof" jumpfalse block.end.37074 clear put add "testeof " block.end.37074: testbegins "tapetest" jumpfalse block.end.37124 clear put add "testtape " block.end.37124: get add "\n" add "jumpfalse " count add "\n" ++ ++ get -- -- put clear add "andtestset*{*" push push a+ a+ jump parse block.end.37293: # eg # negated tests concatenated with AND logic (.). The # negated tests can be chained with non negated tests. # eg: B'http' . !E'.txt' { ... } testis "notbegintext*.*andtestset*{*" jumptrue 12 testis "notendtext*.*andtestset*{*" jumptrue 10 testis "notquote*.*andtestset*{*" jumptrue 8 testis "notclass*.*andtestset*{*" jumptrue 6 testis "noteof*.*andtestset*{*" jumptrue 4 testis "nottapetest*.*andtestset*{*" jumptrue 2 jump block.end.38094 testbegins "notbegin" jumpfalse block.end.37694 clear add "testbegins " block.end.37694: testbegins "notend" jumpfalse block.end.37737 clear add "testends " block.end.37737: testbegins "notquote" jumpfalse block.end.37780 clear add "testis " block.end.37780: testbegins "notclass" jumpfalse block.end.37826 clear add "testclass " block.end.37826: testbegins "noteof" jumpfalse block.end.37873 clear put add "testeof " block.end.37873: testbegins "nottapetest" jumpfalse block.end.37926 clear put add "testtape " block.end.37926: get add "\n" add "jumptrue " count add "\n" ++ ++ get -- -- put clear add "andtestset*{*" push push a+ a+ jump parse block.end.38094: #------------------------------------- # we should not have to check for the {*command*}* pattern # because that has already been transformed to {*commandset*}* testis "test*{*commandset*}*" jumptrue 6 testis "andtestset*{*commandset*}*" jumptrue 4 testis "ortestset*{*commandset*}*" jumptrue 2 jump block.end.39165 # indent the assembled code for readability testbegins "test*{*" jumpfalse block.end.38715 clear # get rid of unnecessary jump but only in "test" cases get # for positive tests (eg [a-z] {...}) replace "jumptrue 2 \njump" "jumpfalse" put # for negative tests (eg ![a-z] {...}) replace "jumpfalse 2 \njump" "jumptrue" put block.end.38715: clear ++ ++ add " " get replace "\n" "\n " put -- -- clear get # the final jump (to the closing brace) has already been # coded in the "test*{*" rule or the other rules. # we just need to add the label number with "cc" cc add "\n" ++ ++ get add "\nblock.end." cc add ":" -- -- put clear add "command*" push # always reparse/compile jump parse block.end.39165: # ------------- # multi-token end-of-stream errors # not a comprehensive list of errors... testeof jumpfalse block.end.39941 testends "begintext*" jumptrue 10 testends "endtext*" jumptrue 8 testends "test*" jumptrue 6 testends "ortestset*" jumptrue 4 testends "andtestset*" jumptrue 2 jump block.end.39472 add " Error near end of script at line " ll add ". Test with no brace block? \n" print clear quit block.end.39472: testends "quote*" jumptrue 6 testends "class*" jumptrue 4 testends "word*" jumptrue 2 jump block.end.39684 put clear add "Error end of script! (line " ll add ") missing semi-colon? \n" add "Parse stack: " get add "\n" print clear quit block.end.39684: testends "{*" jumptrue 16 testends "}*" jumptrue 14 testends ";*" jumptrue 12 testends ",*" jumptrue 10 testends ".*" jumptrue 8 testends "!*" jumptrue 6 testends "B*" jumptrue 4 testends "E*" jumptrue 2 jump block.end.39937 put clear add "Error: misplaced terminal character at end of script! (line " ll add "). \n" add "Parse stack: " get add "\n" print clear quit block.end.39937: block.end.39941: # put the 4 (or less) tokens back on the stack push push push push testeof jumpfalse block.end.41982 #add "end of script!! \n" print clear #--------------------- # check if the script correctly parsed (there should only # be one token on the stack, namely "commandset*" or "command*" pop pop testis "commandset*" jumptrue 4 testis "command*" jumptrue 2 jump block.end.40834 push -- add "# Assembled with the script 'compile.pss' \n" add "start:\n" get # an extra space because of a bug in compile() add "\njump start \n" # put a copy of the final compilation into the tapecell # so it can be inspected interactively. put # remove this print from asm.pp after generating a new asm.pp # with pep -f compile.pss compile.pss > asm.new.pp; cp asm.new.pp asm.pp # print # remove! # save the compiled script to 'sav.pp' write clear quit block.end.40834: testis "beginblock*commandset*" jumptrue 4 testis "beginblock*command*" jumptrue 2 jump block.end.41474 clear add "# Assembled with the script 'compile.pss' \n" get add "\n" ++ add "start:\n" get # an extra space because of a bug in compile() add "\njump start \n" # put a copy of the final compilation into the tapecell # so it can be inspected interactively. put # remove this 'print' from asm.pp after generating a new asm.pp # with pep -f compile.pss compile.pss > asm.new.pp; cp asm.new.pp asm.pp # print # remove! # also save the compiled script to 'sav.pp' write clear quit block.end.41474: push push # state clear add "After compiling with 'compile.pss' (at EOF): \n " add " parse error in input script, check syntax: \n " add " To debug script try the -I switch with \n " add " >> pep -If script -i 'some input' \n " add " or to debug the compilation process try: \n " add " >> pep -Ia asm.pp script' \n " print clear # clear sav.pp because script could not be compiled write # bail means exit with error bail block.end.41982: # not eof # there is an implicit .restart command here (jump start) jump start