# Assembled with the script 'compile.pss' start: # translate.tcl.pss # # This is a parse-script which translates parse-scripts into tcl # code, using the 'pep' tool. The script creates a standalone # tcl program # # The virtual machine and engine is implemented in plain c at # http://bumble.sf.net/books/pars/pep.c. This implements a script # language with a syntax reminiscent of sed and awk (much simpler than # awk, but more complex than sed). # # This code was originally created by adapting the code in # 'translate.java.pss' which compiles scripts to java # #STATUS # # 7 sept 2022 # Most short tests working. # #NOTES # # Tcl doesn't require a semicolon ; at the end of a 1 line # statement, but I will include it, in case I want to put more # than one statement on a line. also for comments # # In other translation scripts, we use labelled loops and # break/continue to implement the parse> label and .reparse .restart # commands. But tcl does not have these, nor a "goto" statement. # This is resolved using "run once" blocks and break and continue # statements: eg while true { read; break; }. The run once technique # is a better general solution and could be used in all the other # translation scripts (because it doesnt require goto or labelled # blocks) # #TODO # # writefile; writefile "name"; readfile; readfile "name"; # The version without a parameter gets the filename from the # current tape cell. # # Consolidate error checking after parse> label # Add script* token on succesfull parse at EOF # # Add "until;"? (no-argument until). Add the "write ;" # and "append ;" commands. # # Convert to a parse method, which will also allow this to # act as an immediate interpreter of pep/nom scripts. # See tr/translate.perl.pss for a good example of a parse # method, that can act as an interpreter. # # Convert the grammar to the simpler version used in translate.perl.pss # (which also allows test-expressions like # >> (B"xx",B"yy").!E"zz" { ... } # #SEE ALSO # # At http://bumble.sf.net/books/pars/ # # d/- eg/: some example pep/nom scripts # - tr/: # the folder contains other translation scripts. The scripts # for python/ruby/java/go/javascript/tcl etc are quite complete. # - translate.py.pss # A python translator in working order (june 2021). # - compile.pss # compiles a script into an "assembly" format that can be loaded # and run on the parse-machine with the -a switch. This performs # the same function as "asm.pp" # #TESTING # # Comprehensive 1st and 2nd generation tests can be done with # >> pep.tt tcl # # This script tests every command in the pep/nom language with a # simple script and tests its translation into TCL. # # Also, use the helper functions in the peprc helpers.pars.sh such as # pep.ts pep.tss pep.tf and pep.tff # # Remember "man 3tcl command" for documentation about tcl/tk on # Unix-like systems. I would like to make a similar doc system for # pep. Could split the pars-book.txt file into "man" pages for # each pep command. # # * a simple test procedure, working # --------- # pep -f translate.tcl.pss -i "r;t;t;d;" > test.tcl # chmod a+x test.tcl # echo "abc" | tclsh test.tcl # or # echo "abc" | ./test.tcl # # should print 'aabbcc' # ,,, # # Check multiline text with 'add' and 'until' # # * one comprehensive test is to run the script on itself # >> pep -f translate.tcl.pss translate.tcl.pss > tran.tcl # # This is the "shangrilah" of pep scripts. # # test # ----- # pep -f translate.tcl.pss translate.tcl.pss > eg/tcl/translate.tcl.tcl # chmod a+x eg/tcl/translate.tcl.tcl # echo "nop;r;t;t;d;" | eg/tcl/translate.tcl.tcl # ,,,, # # test eg/natural.language.pss # # * translate the doc formatter into tcl # ---- # pep -f translate.tcl.pss eg/mark.latex.pss > eg/tcl/mark.latex.tcl # cat pars-book.txt | eg/tcl/mark.latex.tcl # ,,,, # # * translate the translator into tcl # ----- # pep -f translate.tcl.pss translate.tcl.pss > test.tcl # cat eg/exp.tolisp.pss | ./test.tcl > exp.tolisp.tr.tcl # echo "(a+2)*3+4" | ./exp.tolisp.tr.tcl # ,,, # # This is fairly complex. The script translates itself into # tcl, and then that translator is used to translate # another script into tcl, which is then executed.... # # self referentiality cubed, not working yet # ----- # pep -f translate.tcl.pss translate.tcl.pss > 1.tr.tcl.pss # chmod a+x 1.tr.tcl.pss # cat translate.tcl.pss | 1.tr.tcl.pss > 2.tr.tcl.pss # chmod a+x 2.tr.tcl.pss # cat eg/exp.tolisp.pss | 2.tr.tcl.pss > test.tcl # chmod a+x test.tcl # echo "(a+2)*3+4" | ./test.tcl # ,,, # #BUGS # # image format not working in eg/mark.latex.pss eg [[ ... ]] # # In second gen scripts \\ causes problems # # No-argument until not implemented here. # # unescape needs to walk the string, not just do \c -> c # # check until code for multiple escapes \\\\\\ # parse> label cannot be at beginning or end of script. # #SOLVED BUGS TO WATCH FOR # # Need to escape *[]\? in begin and ends tests because this # uses string match. # # In quotes interpolation will occur which is not good. We need to # stop it by escaping certain chars in the quote string. # # With these scripting languages, arrays elements dont exist # unless created. so "++; put; " fails with a list index out # of range error. # # Need to double \\ backslash sometimes to get a single one in the output # # found and fixed a bug in java whilenot/while. The code exits if the # character is not found, which is not correct. # # "until" bug where the code did not read at least one character. # # Read must exit if at end of stream, but while/whilenot/until, no. # #TASKS # #HISTORY # # 18 feb 2022 # Added go; syntax # 13 aug 2022 # One year later, fixing some remaining bugs. Quoting of special # chars in "string match" and regex is not easy in tcl. # When running eg/mark.latex.pss for some reason # "bl*uutext*4dots*nl*" does not reduce... # 20 aug 2021 # fixing escape code. Unescape still doesnt seem to work. # 18 august 2021 # seem to have fixed multiple escape bug. # 18 june 2021 # fixing the .restart bug before the parse label. just making .restart # do 'break;' instead of continue (which results in an infinite loop) # Added upper,lower and cap commands to this translator script. # # 7 june 2021 # Reexamining. Escaped { and } in classes. Made an eg/tcl/ folder # where generated code can go. Made the pep.tcs bash function # better for testing # # 7 august 2020 # the eg/exp.tolisp.pss seems to be working under translation # to tcl. # # testing and debugging. Init procedure. # reconfigured .reparse .restart as unlabelled run-once # blocks. This is useful for any language that has no "goto" # and no labelled blocks (loops etc). Script is nearing a # useable stage. # # 5 august 2020 # # tested print/clear/clop/clip/push/pop/stack/unstack # put/get/swap/ ... # # First most basic script is working: "r;t;t;d;" # # 2 august 2020 # # Began to adapt this script from translate.py.pss, the # python translator # # read #-------------- testclass [:space:] jumpfalse block.end.7134 clear jump parse block.end.7134: #--------------- # We can ellide all these single character tests, because # the stack token is just the character itself with a * # Braces {} are used for blocks of commands, ',' and '.' for concatenating # tests with OR or AND logic. 'B' and 'E' for begin and end # tests, '!' is used for negation, ';' is used to terminate a # command. testis "{" jumptrue 16 testis "}" jumptrue 14 testis ";" jumptrue 12 testis "," jumptrue 10 testis "." jumptrue 8 testis "!" jumptrue 6 testis "B" jumptrue 4 testis "E" jumptrue 2 jump block.end.7570 put add "*" push jump parse block.end.7570: #--------------- # format: "text" testis "\"" jumpfalse block.end.8271 # save the start line number (for error messages) in case # there is no terminating quote character. clear add "line " ll add " (character " cc add ") " # safer to use braces for quotes?? put clear add "\"" until "\"" testends "\"" jumptrue block.end.8004 clear add "Unterminated quote character (\") starting at " get add " !\n" print quit block.end.8004: # just escape [ and $ because they cause string interpolation # in tcl clip escape "[" escape "]" escape "$" escape "{" escape "}" escape "*" escape "^" escape "?" add "\"" put clear add "quote*" push jump parse block.end.8271: #--------------- # format: 'text', single quotes are converted to double quotes # but we must escape embedded double quotes. testis "'" jumpfalse block.end.9077 # save the start line number (for error messages) in case # there is no terminating quote character. clear add "line " ll add " (character " cc add ") " put clear until "'" testends "'" jumptrue block.end.8735 clear add "Unterminated quote (') starting at " get add "!\n" print quit block.end.8735: clip # [ and $ cause interpolation in tcl so must be escaped # escape other special chars. What about "\\"??? escape "[" escape "]" escape "$" escape "{" escape "}" escape "\"" escape "*" escape "^" escape "?" put clear add "\"" get add "\"" put clear add "quote*" push jump parse block.end.9077: #--------------- # formats: [:space:] [a-z] [abcd] [:alpha:] etc # should class tests really be multiline??! testis "[" jumpfalse block.end.12684 # save the start line number (for error messages) in case # there is no terminating bracket character. clear add "line " ll add " (character " cc add ") " put clear add "[" until "]" testis "[]" jumpfalse block.end.9601 clear add "pep script error at line " ll add " (character " cc add "): \n" add " empty character class [] \n" print quit block.end.9601: testends "]" jumptrue block.end.9888 clear add "Unterminated class text ([...]) starting at " get add "\n" add " class text can be used in tests or with the 'while' and \n" add " 'whilenot' commands. For example: \n" add " [:alpha:] { while [:alpha:]; print; clear; }\n" add " " print quit block.end.9888: # need to escape { and } so they dont interfere with the # quote braces used for regexp. Dont need to escape [ and ] because # the script writer has to escape them any way. (but what about in quotes?) escape "}" escape "{" # the caret is not a negation operator in pep scripts escape "^" # the line below also works #replace "^" "\\^"; # save the class on the tape put clop clop testbegins "-" jumptrue block.end.10442 # not a range class, eg [a-z] so need to escape '-' chars. yes clear get escape "-" put block.end.10442: testbegins "-" jumpfalse block.end.10830 # a range class, eg [a-z], check if it is correct clip clip testis "-" jumptrue block.end.10824 clear add "Error in pep script at line " ll add " (character " cc add "): \n" add " Incorrect character range class " get add "\n" add " For example:\n" add " [a-g] # correct\n" add " [f-gh] # error! \n" print clear quit block.end.10824: block.end.10830: clear get # restore class text testbegins "[:" jumpfalse 3 testends ":]" jumpfalse 2 jump block.end.10995 clear add "malformed character class starting at " get add "!\n" print quit block.end.10995: testbegins "[:" jumpfalse 3 testis "[:]" jumpfalse 2 jump block.end.11826 clip clip clop clop # unicode posix character classes in tcl # Also, abbreviations (not implemented in gh.c yet.) # abbreviations are written [:A:] etc testis "alnum" jumptrue 4 testis "N" jumptrue 2 jump block.end.11242 clear add "[[:alnum:]]" block.end.11242: testis "alpha" jumptrue 4 testis "A" jumptrue 2 jump block.end.11290 clear add "[[:alpha:]]" block.end.11290: testis "ascii" jumptrue 4 testis "I" jumptrue 2 jump block.end.11338 clear add "[[:ascii:]]" block.end.11338: testis "blank" jumptrue 4 testis "B" jumptrue 2 jump block.end.11386 clear add "[[:blank:]]" block.end.11386: testis "cntrl" jumptrue 4 testis "C" jumptrue 2 jump block.end.11434 clear add "[[:cntrl:]]" block.end.11434: testis "digit" jumptrue 4 testis "D" jumptrue 2 jump block.end.11482 clear add "[[:digit:]]" block.end.11482: testis "graph" jumptrue 4 testis "G" jumptrue 2 jump block.end.11530 clear add "[[:graph:]]" block.end.11530: testis "lower" jumptrue 4 testis "L" jumptrue 2 jump block.end.11578 clear add "[[:lower:]]" block.end.11578: testis "print" jumptrue 4 testis "P" jumptrue 2 jump block.end.11626 clear add "[[:print:]]" block.end.11626: testis "punct" jumptrue 4 testis "T" jumptrue 2 jump block.end.11674 clear add "[[:punct:]]" block.end.11674: testis "space" jumptrue 4 testis "S" jumptrue 2 jump block.end.11722 clear add "[[:space:]]" block.end.11722: testis "upper" jumptrue 4 testis "U" jumptrue 2 jump block.end.11770 clear add "[[:upper:]]" block.end.11770: testis "xdigit" jumptrue 4 testis "X" jumptrue 2 jump block.end.11820 clear add "[[:xdigit:]]" block.end.11820: block.end.11826: # alnum - alphanumeric like [0-9a-zA-Z] # alpha - alphabetic like [a-zA-Z] # blank - blank chars, space and tab # cntrl - control chars, ascii 000 to 037 and 177 (del) # digit - digits 0-9 # graph - graphical chars same as :alnum: and :punct: # lower - lower case letters [a-z] # print - printable chars ie :graph: + space # punct - punctuation ie !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~. # space - all whitespace, eg \n\r\t vert tab, space, \f # upper - upper case letters [A-Z] # xdigit - hexadecimal digit ie [0-9a-fA-F] # put clear # add quotes around the class and limits around the # class so it can be used with the string.matches() method # (must match the whole string, not just one character) add "^" get add "+$" put clear add "class*" push jump parse block.end.12684: #--------------- # formats: (eof) (EOF) (==) etc. testis "(" jumpfalse block.end.13155 clear until ")" clip put testis "eof" jumptrue 4 testis "EOF" jumptrue 2 jump block.end.12838 clear add "eof*" push jump parse block.end.12838: testis "==" jumpfalse block.end.12891 clear add "tapetest*" push jump parse block.end.12891: add " << unknown test near line " ll add " of script.\n" add " bracket () tests are \n" add " (eof) test if end of stream reached. \n" add " (==) test if workspace is same as current tape cell \n" print clear quit block.end.13155: #--------------- # multiline and single line comments, eg #... and #* ... *# testis "#" jumpfalse block.end.14652 clear read testis "\n" jumpfalse block.end.13291 clear jump parse block.end.13291: # checking for multiline comments of the form "#* \n\n\n *#" # these are just ignored at the moment (deleted) testis "*" jumpfalse block.end.14196 # save the line number for possible error message later clear ll put clear until "*#" testends "*#" jumpfalse block.end.13940 # no, need to convert to multiple single line comments # or a if {0} { } block for tcl clip clip put clear add "if {0} {" get add "}" # create a "comment" parse token put clear # comment-out this line to remove multiline comments from the # compiled code # add "comment*"; push; jump parse block.end.13940: # make an unterminated multiline comment an error # to ease debugging of scripts. clear add "Unterminated multiline comment #* ... *# \n" add "starting at line number " get add "\n" print clear quit block.end.14196: # single line comments. some will get lost. put clear add "#" get until "\n" clip # escape special chars for tcl, since it looks for # these even in comments! (I know, strange but true) escape "[" escape "]" escape "$" escape "{" escape "}" put clear # I am removing comments from translated scripts because # tcl doesnt like unbalanced braces in comments! add "comment*" push jump parse block.end.14652: #---------------------------------- # parse command words (and abbreviations) # legal characters for keywords (commands) testclass [abcdefghijklmnopqrstuvwxyzBEKGPRUWS+-<>0^] jumptrue block.end.15039 # error message about a misplaced character put clear add "!! Misplaced character '" get add "' in script near line " ll add " (character " cc add ") \n" print clear quit block.end.15039: # my testclass implementation cannot handle complex lists # eg [a-z+-] this is why I have to write out the whole alphabet while [abcdefghijklmnopqrstuvwxyzBEOFKGPRUWS+-<>0^] #---------------------------------- # KEYWORDS # here we can test for all the keywords (command words) and their # abbreviated one letter versions (eg: clip k, clop K etc). Then # we can print an error message and abort if the word is not a # legal keyword for the parse-edit language # make ll an alias for "lines" and cc an alias for chars testis "ll" jumpfalse block.end.15623 clear add "lines" block.end.15623: testis "cc" jumpfalse block.end.15655 clear add "chars" block.end.15655: # one letter command abbreviations testis "a" jumpfalse block.end.15722 clear add "add" block.end.15722: testis "k" jumpfalse block.end.15752 clear add "clip" block.end.15752: testis "K" jumpfalse block.end.15782 clear add "clop" block.end.15782: testis "D" jumpfalse block.end.15815 clear add "replace" block.end.15815: testis "d" jumpfalse block.end.15846 clear add "clear" block.end.15846: testis "t" jumpfalse block.end.15877 clear add "print" block.end.15877: testis "p" jumpfalse block.end.15906 clear add "pop" block.end.15906: testis "P" jumpfalse block.end.15936 clear add "push" block.end.15936: testis "u" jumpfalse block.end.15969 clear add "unstack" block.end.15969: testis "U" jumpfalse block.end.16000 clear add "stack" block.end.16000: testis "G" jumpfalse block.end.16029 clear add "put" block.end.16029: testis "g" jumpfalse block.end.16058 clear add "get" block.end.16058: testis "x" jumpfalse block.end.16088 clear add "swap" block.end.16088: testis ">" jumpfalse block.end.16116 clear add "++" block.end.16116: testis "<" jumpfalse block.end.16144 clear add "--" block.end.16144: testis "m" jumpfalse block.end.16174 clear add "mark" block.end.16174: testis "M" jumpfalse block.end.16202 clear add "go" block.end.16202: testis "r" jumpfalse block.end.16232 clear add "read" block.end.16232: testis "R" jumpfalse block.end.16263 clear add "until" block.end.16263: testis "w" jumpfalse block.end.16294 clear add "while" block.end.16294: testis "W" jumpfalse block.end.16328 clear add "whilenot" block.end.16328: testis "n" jumpfalse block.end.16359 clear add "count" block.end.16359: testis "+" jumpfalse block.end.16387 clear add "a+" block.end.16387: testis "-" jumpfalse block.end.16415 clear add "a-" block.end.16415: testis "0" jumpfalse block.end.16445 clear add "zero" block.end.16445: testis "c" jumpfalse block.end.16476 clear add "chars" block.end.16476: testis "l" jumpfalse block.end.16507 clear add "lines" block.end.16507: testis "^" jumpfalse block.end.16539 clear add "escape" block.end.16539: testis "v" jumpfalse block.end.16573 clear add "unescape" block.end.16573: testis "z" jumpfalse block.end.16604 clear add "delim" block.end.16604: testis "S" jumpfalse block.end.16635 clear add "state" block.end.16635: testis "q" jumpfalse block.end.16665 clear add "quit" block.end.16665: testis "s" jumpfalse block.end.16696 clear add "write" block.end.16696: testis "o" jumpfalse block.end.16725 clear add "nop" block.end.16725: testis "rs" jumpfalse block.end.16759 clear add "restart" block.end.16759: testis "rp" jumpfalse block.end.16793 clear add "reparse" block.end.16793: # some extra syntax for testeof and testtape testis "" jumptrue 4 testis "" jumptrue 2 jump block.end.16904 put clear add "eof*" push jump parse block.end.16904: testis "<==>" jumpfalse block.end.16962 put clear add "tapetest*" push jump parse block.end.16962: testis "jump" jumptrue 18 testis "jumptrue" jumptrue 16 testis "jumpfalse" jumptrue 14 testis "testis" jumptrue 12 testis "testclass" jumptrue 10 testis "testbegins" jumptrue 8 testis "testends" jumptrue 6 testis "testeof" jumptrue 4 testis "testtape" jumptrue 2 jump block.end.17290 put clear add "The instruction '" get add "' near line " ll add " (character " cc add ")\n" add "can be used in pep assembly code but not scripts. \n" print clear quit block.end.17290: # show information if these "deprecated" commands are used testis "Q" jumptrue 4 testis "bail" jumptrue 2 jump block.end.17634 put clear add "The instruction '" get add "' near line " ll add " (character " cc add ")\n" add "is no longer part of the pep language (july 2020). \n" add "use 'quit' instead of 'bail'\n" print clear quit block.end.17634: testis "add" jumptrue 82 testis "clip" jumptrue 80 testis "clop" jumptrue 78 testis "replace" jumptrue 76 testis "upper" jumptrue 74 testis "lower" jumptrue 72 testis "cap" jumptrue 70 testis "clear" jumptrue 68 testis "print" jumptrue 66 testis "pop" jumptrue 64 testis "push" jumptrue 62 testis "unstack" jumptrue 60 testis "stack" jumptrue 58 testis "put" jumptrue 56 testis "get" jumptrue 54 testis "swap" jumptrue 52 testis "++" jumptrue 50 testis "--" jumptrue 48 testis "mark" jumptrue 46 testis "go" jumptrue 44 testis "read" jumptrue 42 testis "until" jumptrue 40 testis "while" jumptrue 38 testis "whilenot" jumptrue 36 testis "count" jumptrue 34 testis "a+" jumptrue 32 testis "a-" jumptrue 30 testis "zero" jumptrue 28 testis "chars" jumptrue 26 testis "lines" jumptrue 24 testis "nochars" jumptrue 22 testis "nolines" jumptrue 20 testis "escape" jumptrue 18 testis "unescape" jumptrue 16 testis "delim" jumptrue 14 testis "quit" jumptrue 12 testis "state" jumptrue 10 testis "write" jumptrue 8 testis "nop" jumptrue 6 testis "reparse" jumptrue 4 testis "restart" jumptrue 2 jump block.end.18035 put clear add "word*" push jump parse block.end.18035: #------------ # the .reparse command and "parse label" is a simple way to # make sure that all shift-reductions occur. It should be used inside # a block test, so as not to create an infinite loop. There is # no "goto" in java so we need to use labelled loops to # implement .reparse/parse> testis "parse>" jumpfalse block.end.18688 clear count testis "0" jumptrue block.end.18543 clear add "[error] pep script error:\n" add " extra parse> label at line " ll add ".\n" print quit block.end.18543: clear add "// parse>" put clear add "parse>*" push # use accumulator to indicate after parse> label a+ jump parse block.end.18688: # -------------------- # implement "begin-blocks", which are only executed # once, at the beginning of the script (similar to awk's BEGIN {} rules) testis "begin" jumpfalse block.end.18899 put add "*" push jump parse block.end.18899: add " << unknown command on line " ll add " (char " cc add ")" add " of source file. \n" add " \n" add " Valid commands are:\n" add " add,clip,clop,replace,upper,lower,cap,clear,print,\n" add " pop,push,unstack,stack,put,get,swap,\n" add " ++,--,mark,go,read,until,while,whilenot,\n" add " count,a+,a-,zero,chars,lines,nochars,nolines,\n" add " escape,unescape,delim,quit,state,\n" add " write,nop,.reparse,.restart \n" print clear quit # ---------------------------------- # PARSING PHASE: # Below is the parse/compile phase of the script. Here we pop tokens off the # stack and check for sequences of tokens eg "word*semicolon*". If we find a # valid series of tokens, we "shift-reduce" or "resolve" the token series eg # word*semicolon* --> command* # At the same time, we manipulate (transform) the attributes on the tape, as # required. parse: #------------------------------------- # 2 tokens #------------------------------------- pop pop # All of the patterns below are currently errors, but may not # be in the future if we expand the syntax of the parse # language. Also consider: # begintext* endtext* quoteset* notclass*, !* ,* ;* B* E* # It is nice to trap the errors here because we can emit some # (hopefully not very cryptic) error messages with a line number. # Otherwise the script writer has to debug with # pep -a asm.pp -I scriptfile testis "word*word*" jumptrue 50 testis "word*}*" jumptrue 48 testis "word*begintext*" jumptrue 46 testis "word*endtext*" jumptrue 44 testis "word*!*" jumptrue 42 testis "word*,*" jumptrue 40 testis "quote*word*" jumptrue 38 testis "quote*class*" jumptrue 36 testis "quote*state*" jumptrue 34 testis "quote*}*" jumptrue 32 testis "quote*begintext*" jumptrue 30 testis "quote*endtext*" jumptrue 28 testis "class*word*" jumptrue 26 testis "class*quote*" jumptrue 24 testis "class*class*" jumptrue 22 testis "class*state*" jumptrue 20 testis "class*}*" jumptrue 18 testis "class*begintext*" jumptrue 16 testis "class*endtext*" jumptrue 14 testis "class*!*" jumptrue 12 testis "notclass*word*" jumptrue 10 testis "notclass*quote*" jumptrue 8 testis "notclass*class*" jumptrue 6 testis "notclass*state*" jumptrue 4 testis "notclass*}*" jumptrue 2 jump block.end.20950 add " (Token stack) \nValue: \n" get add "\nValue: \n" ++ get -- add "\n" add "Error near line " ll add " (char " cc add ")" add " of pep script (missing semicolon?) \n" print clear quit block.end.20950: testis "{*;*" jumptrue 6 testis ";*;*" jumptrue 4 testis "}*;*" jumptrue 2 jump block.end.21145 push push add "Error near line " ll add " (char " cc add ")" add " of pep script: misplaced semi-colon? ; \n" print clear quit block.end.21145: testis ",*{*" jumpfalse block.end.21315 push push add "Error near line " ll add " (char " cc add ")" add " of script: extra comma in list? \n" print clear quit block.end.21315: testis "command*;*" jumptrue 4 testis "commandset*;*" jumptrue 2 jump block.end.21504 push push add "Error near line " ll add " (char " cc add ")" add " of script: extra semi-colon? \n" print clear quit block.end.21504: testis "!*!*" jumpfalse block.end.21767 push push add "error near line " ll add " (char " cc add ")" add " of script: \n double negation '!!' is not implemented \n" add " and probably won't be, because what would be the point? \n" print clear quit block.end.21767: testis "!*{*" jumptrue 4 testis "!*;*" jumptrue 2 jump block.end.22082 push push add "error near line " ll add " (char " cc add ")" add " of script: misplaced negation operator (!)? \n" add " The negation operator precedes tests, for example: \n" add " !B'abc'{ ... } or !(eof),!'abc'{ ... } \n" print clear quit block.end.22082: testis ",*command*" jumpfalse block.end.22258 push push add "error near line " ll add " (char " cc add ")" add " of script: misplaced comma? \n" print clear quit block.end.22258: testis "!*command*" jumpfalse block.end.22463 push push add "error near line " ll add " (at char " cc add ") \n" add " The negation operator (!) cannot precede a command \n" print clear quit block.end.22463: testis ";*{*" jumptrue 6 testis "command*{*" jumptrue 4 testis "commandset*{*" jumptrue 2 jump block.end.22672 push push add "error near line " ll add " (char " cc add ")" add " of script: no test for brace block? \n" print clear quit block.end.22672: testis "{*}*" jumpfalse block.end.22806 push push add "error near line " ll add " of script: empty braces {}. \n" print clear quit block.end.22806: testis "B*class*" jumptrue 4 testis "E*class*" jumptrue 2 jump block.end.23037 push push add "error near line " ll add " of script:\n classes ([a-z], [:space:] etc). \n" add " cannot use the 'begin' or 'end' modifiers (B/E) \n" print clear quit block.end.23037: testis "comment*{*" jumpfalse block.end.23229 push push add "error near line " ll add " of script: comments cannot occur between \n" add " a test and a brace ({). \n" print clear quit block.end.23229: testis "}*command*" jumpfalse block.end.23379 push push add "error near line " ll add " of script: extra closing brace '}' ?. \n" print clear quit block.end.23379: # E"begin*".!"begin*" { # push; push; # add "error near line "; lines; # add " of script: Begin blocks must precede code \n"; # print; clear; quit; # } # #------------ # The .restart command jumps to the first instruction after the # begin block (if there is a begin block), or the first instruction # of the script. testis ".*word*" jumpfalse block.end.24777 clear ++ get -- testis "restart" jumpfalse block.end.24178 clear count #"0" { clear; add "continue; # .restart "; } testis "0" jumpfalse block.end.24059 clear # use the comment '# restart' so we can replace # this with 'break' if the parse> label appears later add "set restart true; continue; # restart" block.end.24059: testis "1" jumpfalse block.end.24110 clear add "break; # .restart " block.end.24110: put clear add "command*" push jump parse block.end.24178: testis "reparse" jumpfalse block.end.24564 clear count # no labelled loops in tcl # check accumulator to see if we are in the "lex" block # or the "parse" block and adjust the .reparse compilation # accordingly. testis "0" jumpfalse block.end.24444 clear add "break; # .reparse " block.end.24444: testis "1" jumpfalse block.end.24496 clear add "continue; # .reparse " block.end.24496: put clear add "command*" push jump parse block.end.24564: push push add "error near line " ll add " (char " cc add ")" add " of script: \n" add " misplaced dot '.' (use for AND logic or in .reparse/.restart \n" print clear quit block.end.24777: #--------------------------------- # Compiling comments so as to transfer them to the java testis "comment*command*" jumptrue 6 testis "command*comment*" jumptrue 4 testis "commandset*comment*" jumptrue 2 jump block.end.25028 clear get add "\n" ++ get -- put clear add "command*" push jump parse block.end.25028: testis "comment*comment*" jumpfalse block.end.25142 clear get add "\n" ++ get -- put clear add "comment*" push jump parse block.end.25142: # ----------------------- # negated tokens. # This is a new more elegant way to negate a whole set of # tests (tokens) where the negation logic is stored on the # stack, not in the current tape cell. We just add "not" to # the stack token. # eg: ![:alpha:] ![a-z] ![abcd] !"abc" !B"abc" !E"xyz" # This format is used to indicate a negative test for # a brace block. eg: ![aeiou] { add "< not a vowel"; print; clear; } testis "!*quote*" jumptrue 12 testis "!*class*" jumptrue 10 testis "!*begintext*" jumptrue 8 testis "!*endtext*" jumptrue 6 testis "!*eof*" jumptrue 4 testis "!*tapetest*" jumptrue 2 jump block.end.25940 # a simplification: store the token name "quote*/class*/..." # in the tape cell corresponding to the "!*" token. replace "!*" "not" push # this was a bug?? a missing ++; ?? # now get the token-value get -- put ++ clear jump parse block.end.25940: #----------------------------------------- # format: E"text" or E'text' # This format is used to indicate a "workspace-ends-with" text before # a brace block. testis "E*quote*" jumpfalse block.end.26446 clear add "endtext*" push get testis "\"\"" jumpfalse block.end.26403 # empty argument is an error clear add "pep script error near line " ll add " (character " cc add "): \n" add " empty argument for end-test (E\"\") \n" print quit block.end.26403: -- put ++ clear jump parse block.end.26446: #----------------------------------------- # format: B"sometext" or B'sometext' # A 'B' preceding some quoted text is used to indicate a # 'workspace-begins-with' test, before a brace block. testis "B*quote*" jumpfalse block.end.26993 clear add "begintext*" push get testis "\"\"" jumpfalse block.end.26950 # empty argument is an error clear add "pep script error near line " ll add " (character " cc add "): \n" add " empty argument for begin-test (B\"\") \n" print quit block.end.26950: -- put ++ clear jump parse block.end.26993: #-------------------------------------------- # ebnf: command := word, ';' ; # formats: "pop; push; clear; print; " etc # all commands need to end with a semi-colon except for # .reparse and .restart testis "word*;*" jumpfalse block.end.30863 clear # check if command requires parameter get testis "add" jumptrue 16 testis "while" jumptrue 14 testis "whilenot" jumptrue 12 testis "mark" jumptrue 10 testis "escape" jumptrue 8 testis "unescape" jumptrue 6 testis "delim" jumptrue 4 testis "replace" jumptrue 2 jump block.end.27538 put clear add "'" get add "'" add " << command needs an argument, on line " ll add " of script.\n" print clear quit block.end.27538: # the new until; read until workspace ends with tape-cell text testis "until" jumpfalse block.end.27727 clear add "Until [lindex $mm(tape) $mm(cell)]; # until (tape-cell)" put block.end.27727: # the new go; go to mark named on tapecell testis "go" jumpfalse block.end.27893 clear add "GoToMark [lindex $mm(tape) $mm(cell)]; # go (tape-cell)" put block.end.27893: testis "clip" jumpfalse block.end.28072 clear add "if { $mm(work) ne \"\" } {\n" add " set mm(work) [string range $mm(work) 0 end-1]\n" add "}; # clip" put block.end.28072: testis "clop" jumpfalse block.end.28235 clear add "if { $mm(work) ne \"\" } { \n" add " set mm(work) [string range $mm(work) 1 end] } ; # clop" put block.end.28235: testis "clear" jumpfalse block.end.28302 clear add "set mm(work) \"\"; # clear" put block.end.28302: testis "upper" jumpfalse block.end.28403 clear add "set mm(work) [string toupper $mm(work)]; # upper" put block.end.28403: testis "lower" jumpfalse block.end.28504 clear add "set mm(work) [string tolower $mm(work)]; # lower" put block.end.28504: testis "cap" jumpfalse block.end.28603 clear add "set mm(work) [string totitle $mm(work)]; # cap" put block.end.28603: testis "print" jumpfalse block.end.28808 clear # flush doesnt seem necessary #add "puts -nonewline $mm(work); flush stdout; # print"; add "puts -nonewline $mm(work); # print" put block.end.28808: testis "pop" jumpfalse block.end.28847 clear add "Pop;" put block.end.28847: testis "push" jumpfalse block.end.28888 clear add "Push;" put block.end.28888: testis "unstack" jumpfalse block.end.28981 clear add "while {[Pop]} {}; # unstack " put block.end.28981: testis "stack" jumpfalse block.end.29070 clear add "while {[Push]} {}; # stack " put block.end.29070: testis "put" jumpfalse block.end.29169 clear add "lset mm(tape) $mm(cell) $mm(work); # put " put block.end.29169: testis "get" jumpfalse block.end.29276 clear add "append mm(work) [lindex $mm(tape) $mm(cell)]; # get" put block.end.29276: testis "swap" jumpfalse block.end.29625 clear # other ways to swap 2 vars, but tricky with list # lassign "$a $b $c $d" b a d c # foreach {x y} [list $y $x] {break} add "set s $mm(work); \n" add "set mm(work) [lindex $mm(tape) $mm(cell)]; # swap \n" add "lset mm(tape) $mm(cell) $s; # swap " put block.end.29625: testis "++" jumpfalse block.end.29789 clear add "if { $mm(cell) >= $mm(size) } { MoreTape; }\n" add "incr mm(cell); # ++" put block.end.29789: testis "--" jumpfalse block.end.29892 clear add "if { $mm(cell) > 0 } { incr mm(cell) -1 }; # --" put block.end.29892: testis "read" jumpfalse block.end.29950 clear add "Read; # read" put block.end.29950: testis "count" jumpfalse block.end.30033 clear add "append mm(work) $mm(counter); # count " put block.end.30033: testis "a+" jumpfalse block.end.30091 clear add "incr mm(counter); # a+ " put block.end.30091: testis "a-" jumpfalse block.end.30152 clear add "incr mm(counter) -1; # a- " put block.end.30152: testis "zero" jumpfalse block.end.30214 clear add "set mm(counter) 0; # zero " put block.end.30214: testis "chars" jumpfalse block.end.30299 clear add "append mm(work) $mm(charsRead); # chars " put block.end.30299: testis "lines" jumpfalse block.end.30384 clear add "append mm(work) $mm(linesRead); # lines " put block.end.30384: testis "nochars" jumpfalse block.end.30454 clear add "set mm(charsRead) 0; # nochars " put block.end.30454: testis "nolines" jumpfalse block.end.30524 clear add "set mm(linesRead) 0; # nolines " put block.end.30524: # use a labelled loop to quit script. testis "quit" jumpfalse block.end.30608 clear add "exit;" put block.end.30608: testis "state" jumpfalse block.end.30660 clear add "State; # state" put block.end.30660: testis "write" jumpfalse block.end.30709 clear add "WriteToFile;" put block.end.30709: # just eliminate since it does nothing. testis "nop" jumpfalse block.end.30809 clear add "# nop: no operation " put block.end.30809: clear add "command*" push jump parse block.end.30863: #----------------------------------------- # ebnf: commandset := command , command ; testis "command*command*" jumptrue 4 testis "commandset*command*" jumptrue 2 jump block.end.31187 clear add "commandset*" push # format the tape attributes. Add the next command on a newline -- get add "\n" ++ get -- put ++ clear jump parse block.end.31187: #------------------- # here we begin to parse "test*" and "ortestset*" and "andtestset*" # #------------------- # eg: B"abc" {} or E"xyz" {} # transform and markup the different test types testis "begintext*,*" jumptrue 36 testis "endtext*,*" jumptrue 34 testis "quote*,*" jumptrue 32 testis "class*,*" jumptrue 30 testis "eof*,*" jumptrue 28 testis "tapetest*,*" jumptrue 26 testis "begintext*.*" jumptrue 24 testis "endtext*.*" jumptrue 22 testis "quote*.*" jumptrue 20 testis "class*.*" jumptrue 18 testis "eof*.*" jumptrue 16 testis "tapetest*.*" jumptrue 14 testis "begintext*{*" jumptrue 12 testis "endtext*{*" jumptrue 10 testis "quote*{*" jumptrue 8 testis "class*{*" jumptrue 6 testis "eof*{*" jumptrue 4 testis "tapetest*{*" jumptrue 2 jump block.end.32672 # use glob matching for begin and end tests testbegins "begin" jumpfalse block.end.31948 # remove quotes, add {} clear get clip clop # todo: here escape {}*[]? which have a special meaning # in "string match". No do it at quote stage put clear add "[string match {" get add "*} $mm(work)]" block.end.31948: testbegins "end" jumpfalse block.end.32088 # remove quotes clear get clip clop put clear add "[string match {*" get add "} $mm(work)]" block.end.32088: testbegins "quote" jumpfalse block.end.32138 clear add "$mm(work) eq " get block.end.32138: testbegins "class" jumpfalse block.end.32293 # use tcl brace quotes to stop special character problems in patterns. clear add "[regexp {" get add "} $mm(work)]" block.end.32293: testbegins "eof" jumpfalse block.end.32331 clear add "$mm(eof)" block.end.32331: testbegins "tapetest" jumpfalse block.end.32419 clear add "$mm(work) eq [lindex $mm(tape) $mm(cell)]" block.end.32419: put # # maybe we could ellide the not tests by doing here # B"not" { clear; add "!"; get; put; } # clear add "test*" push # the trick below pushes the right token back on the stack. get add "*" push jump parse block.end.32672: #------------------- # negated tests # eg: !B"xyz {} !(eof) {} !(==) {} # !E"xyz" {} # !"abc" {} # ![a-z] {} testis "notbegintext*,*" jumptrue 36 testis "notendtext*,*" jumptrue 34 testis "notquote*,*" jumptrue 32 testis "notclass*,*" jumptrue 30 testis "noteof*,*" jumptrue 28 testis "nottapetest*,*" jumptrue 26 testis "notbegintext*.*" jumptrue 24 testis "notendtext*.*" jumptrue 22 testis "notquote*.*" jumptrue 20 testis "notclass*.*" jumptrue 18 testis "noteof*.*" jumptrue 16 testis "nottapetest*.*" jumptrue 14 testis "notbegintext*{*" jumptrue 12 testis "notendtext*{*" jumptrue 10 testis "notquote*{*" jumptrue 8 testis "notclass*{*" jumptrue 6 testis "noteof*{*" jumptrue 4 testis "nottapetest*{*" jumptrue 2 jump block.end.33791 testbegins "notbegin" jumpfalse block.end.33247 # remove quotes, add {} clear get clip clop put clear add "![string match " get add "* $mm(work)] " block.end.33247: testbegins "notend" jumpfalse block.end.33398 # remove quotes, add {} clear get clip clop put clear add "![string match *" get add " $mm(work)] " block.end.33398: testbegins "notquote" jumpfalse block.end.33451 clear add "$mm(work) ne " get block.end.33451: testbegins "notclass" jumpfalse block.end.33521 clear add "![regexp {" get add "} $mm(work)]" block.end.33521: testbegins "noteof" jumpfalse block.end.33563 clear add "!$mm(eof)" block.end.33563: testbegins "nottapetest" jumpfalse block.end.33654 clear add "$mm(work) ne [lindex $mm(tape) $mm(cell)]" block.end.33654: put clear add "test*" push # the trick below pushes the right token back on the stack. get add "*" push jump parse block.end.33791: #------------------- # 3 tokens #------------------- pop #----------------------------- # some 3 token errors!!! # not a comprehensive list of 3 token errors testis "{*quote*;*" jumptrue 12 testis "{*begintext*;*" jumptrue 10 testis "{*endtext*;*" jumptrue 8 testis "{*class*;*" jumptrue 6 testis "commandset*quote*;*" jumptrue 4 testis "command*quote*;*" jumptrue 2 jump block.end.34268 push push push add "[pep error]\n invalid syntax near line " ll add " (char " cc add ")" add " of script (misplaced semicolon?) \n" print clear quit block.end.34268: # to simplify subsequent tests, transmogrify a single command # to a commandset (multiple commands). testis "{*command*}*" jumpfalse block.end.34464 clear add "{*commandset*}*" push push push jump parse block.end.34464: # errors! mixing AND and OR concatenation testis ",*andtestset*{*" jumptrue 4 testis ".*ortestset*{*" jumptrue 2 jump block.end.34931 # push the tokens back to make debugging easier push push push add " error: mixing AND (.) and OR (,) concatenation in \n" add " in pep script near line " ll add " (character " cc add ") \n" add " \n" add " For example:\n" add " B\".\".!E\"/\".[abcd./] { print; } # Correct!\n" add " B\".\".!E\"/\",[abcd./] { print; } # Error! \n" print clear quit block.end.34931: #-------------------------------------------- # ebnf: command := keyword , quoted-text , ";" ; # format: add "text"; testis "word*quote*;*" jumpfalse block.end.39119 clear get testis "replace" jumpfalse block.end.35274 # error add "< command requires 2 parameters, not 1 \n" add "near line " ll add " of script. \n" print clear quit block.end.35274: # check whether argument is single character, otherwise # throw and error testis "escape" jumptrue 8 testis "unescape" jumptrue 6 testis "while" jumptrue 4 testis "whilenot" jumptrue 2 jump block.end.36273 # This is trickier than I thought it would be. clear ++ get -- # check that arg not empty, (but an empty quote is ok # for the second arg of 'replace' testis "\"\"" jumpfalse block.end.35825 clear add "[pep error] near line " ll add " (or char " cc add "): \n" add " command '" get add "\' cannot have an empty argument (\"\") \n" print quit block.end.35825: # quoted text has the quotes still around it. # also handle escape characters like \n \r etc clip clop clop clop # B "\\" { clip; } clip testis "" jumptrue block.end.36249 clear add "Pep script error near line " ll add " (character " cc add "): \n" add " command '" get add "' takes only a single character argument. \n" print quit block.end.36249: clear get block.end.36273: testis "mark" jumpfalse block.end.36423 clear add "lset mm(marks) $mm(cell) " ++ get -- add "; # mark" put clear add "command*" push jump parse block.end.36423: testis "go" jumpfalse block.end.36548 clear add "GoToMark " ++ get -- add " " put clear add "command*" push jump parse block.end.36548: testis "delim" jumpfalse block.end.36763 clear # only the first character of the delimiter argument is used. add "set mm(delimiter) " ++ get -- add "; # delim " put clear add "command*" push jump parse block.end.36763: testis "add" jumpfalse block.end.37069 clear add "append mm(work) " ++ get -- # handle multiline text # tcl can handle multiline text but I will leave this # anyway because of indenting issues. replace "\n" "\" \nappend mm(work) \"\\n" put clear add "command*" push jump parse block.end.37069: testis "while" jumpfalse block.end.37294 clear add "# while \n" add "while {$mm(peep) eq " ++ get -- add "} { \n" add " if {$mm(eof)} { break; } Read; } " put clear add "command*" push jump parse block.end.37294: testis "whilenot" jumpfalse block.end.37519 clear add "# whilenot \n" add "while {$mm(peep) ne " ++ get -- add "} { \n" add " if {$mm(eof)} { break; } Read }" put clear add "command*" push jump parse block.end.37519: testis "until" jumpfalse block.end.38176 clear ++ get -- # error until cannot have empty argument testis "\"\"" jumpfalse block.end.37958 clear add "Pep script error near line " ll add " (character " cc add "): \n" add " empty argument for 'until' \n" add " \n" add " For example:\n" add " until '.txt'; until \">\"; # correct \n" add " until ''; until \"\"; # errors! \n" print quit block.end.37958: # remove quotes clip clop put clear add "Until {" get # handle multiline argument replace "\n" "\\n" add "};" put clear add "command*" push jump parse block.end.38176: # but hard code escape here! testis "escape" jumpfalse block.end.38516 clear # use "string map" # remove quotes from escape argument ++ get clip clop put clear add "set mm(work) [string map {\"" get add "\" \"\\\\" get add "\"} $mm(work)]" -- put clear add "command*" push jump parse block.end.38516: # could just use replace instead ? # but unescape should probably 'walk the string' to # work out what is really escaped testis "unescape" jumpfalse block.end.38928 clear # remove quotes from escape argument ++ get clip clop put clear add "set mm(work) [string map {\"\\" get add "\" \"" get add "\"} $mm(work)]" -- put clear add "command*" push jump parse block.end.38928: # error, superfluous argument add ": command does not take an argument \n" add "near line " ll add " of script. \n" print clear #state quit block.end.39119: #---------------------------------- # format: "while [:alpha:] ;" or whilenot [a-z] ; testis "word*class*;*" jumpfalse block.end.39851 clear get testis "while" jumpfalse block.end.39471 clear add "# while \n" add "while {[regexp {" ++ get -- add "} $mm(peep)]} { if {$mm(eof)} { break; } Read }" put clear add "command*" push jump parse block.end.39471: testis "whilenot" jumpfalse block.end.39698 clear add "# whilenot \n" add "while {![regexp {" ++ get -- add "} $mm(peep)]} { if {$mm(eof)} { break; } Read }" put clear add "command*" push jump parse block.end.39698: # error add " < command cannot have a class argument \n" add "line " ll add ": error in script \n" print clear quit block.end.39851: # arrange the parse> label loops testeof jumpfalse block.end.41009 testis "commandset*parse>*commandset*" jumptrue 8 testis "command*parse>*commandset*" jumptrue 6 testis "commandset*parse>*command*" jumptrue 4 testis "command*parse>*command*" jumptrue 2 jump block.end.41005 clear # indent both code blocks add " " get replace "\n" "\n " # change .restart code before parse> label # this make .restart work both before, after and without a # parse> label replace "continue; # restart" "break; # restart" put clear ++ ++ add " " get replace "\n" "\n " put clear -- -- # add a block so that .reparse works before the parse> label. # but no labelled loops in tcl add "\n# lex block \n" add "while true { \n" get add "\n break;\n}\n" ++ ++ add "if {$restart == true} { set restart false; continue; }\n" # indent code block # add " "; get; replace "\n" "\n "; put; clear; # tcl doesnt support labelled loops # add "parse: \n"; add "\n# parse block \n" add "while true { \n" get add "\n break; \n" add "}\n" -- -- put clear add "commandset*" push jump parse block.end.41005: block.end.41009: # ------------------------------- # 4 tokens # ------------------------------- pop #------------------------------------- # bnf: command := replace , quote , quote , ";" ; # example: replace "and" "AND" ; testis "word*quote*quote*;*" jumpfalse block.end.41882 clear get testis "replace" jumpfalse block.end.41713 #--------------------------- # a command plus 2 arguments, eg replace "this" "that" # the empty string test is not really necessary here. # note! clear add "# replace \n" add "if {$mm(work) ne \"\"} { \n" add "set mm(work) [string map {" ++ get add " " ++ get add "} $mm(work)] }\n" -- -- put clear add "command*" push jump parse block.end.41713: add "pep script error on line " ll add " (character " cc add "): \n" add " command does not take 2 quoted arguments. \n" print quit block.end.41882: #------------------------------------- # format: begin { #* commands *# } # "begin" blocks which are only executed once (they # will are assembled before the "start:" label. They must come before # all other commands. # "begin*{*command*}*", testis "begin*{*commandset*}*" jumpfalse block.end.42266 clear ++ ++ get -- -- put clear add "beginblock*" push jump parse block.end.42266: # ------------- # parses and compiles concatenated tests # eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... # these 2 tests should be all that is necessary testis "test*,*ortestset*{*" jumptrue 4 testis "test*,*test*{*" jumptrue 2 jump block.end.42610 clear get add " || " ++ ++ get -- -- put clear add "ortestset*{*" push push jump parse block.end.42610: # dont mix AND and OR concatenations # ------------- # AND logic # parses and compiles concatenated AND tests # eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... # it is possible to elide this block with the negated block # for compactness but maybe readability is not as good. # negated tests can be chained with non negated tests. # eg: B'http' . !E'.txt' { ... } testis "test*.*andtestset*{*" jumptrue 4 testis "test*.*test*{*" jumptrue 2 jump block.end.43179 clear get add " && " ++ ++ get -- -- put clear add "andtestset*{*" push push jump parse block.end.43179: #------------------------------------- # we should not have to check for the {*command*}* pattern # because that has already been transformed to {*commandset*}* testis "test*{*commandset*}*" jumptrue 6 testis "andtestset*{*commandset*}*" jumptrue 4 testis "ortestset*{*commandset*}*" jumptrue 2 jump block.end.43742 clear # indent the java code for readability ++ ++ add " " get replace "\n" "\n " put -- -- clear add "if {" get add "} {\n" ++ ++ get add "\n}" -- -- put clear add "command*" push # always reparse/compile jump parse block.end.43742: # ------------- # multi-token end-of-stream errors # not a comprehensive list of errors... testeof jumpfalse block.end.44935 testends "begintext*" jumptrue 10 testends "endtext*" jumptrue 8 testends "test*" jumptrue 6 testends "ortestset*" jumptrue 4 testends "andtestset*" jumptrue 2 jump block.end.44052 add " Error near end of script at line " ll add ". Test with no brace block? \n" print clear quit block.end.44052: testends "quote*" jumptrue 6 testends "class*" jumptrue 4 testends "word*" jumptrue 2 jump block.end.44277 put clear add "Error at end of pep script near line " ll add ": missing semi-colon? \n" add "Parse stack: " get add "\n" print clear quit block.end.44277: # A begin block with no other commands is not really an # error... testis "beginblock*" jumpfalse block.end.44675 put clear add "\n" add " Pep script error: begin block with no other commands.\n" add " Follow the begin block with other script commands. Eg:\n" add " begin { add \"starting script...\"; print; clear; }\n" add " read; print; clear;\n" add " Or use \"nop;\" after the begin block \n" print clear quit block.end.44675: testends "{*" jumptrue 16 testends "}*" jumptrue 14 testends ";*" jumptrue 12 testends ",*" jumptrue 10 testends ".*" jumptrue 8 testends "!*" jumptrue 6 testends "B*" jumptrue 4 testends "E*" jumptrue 2 jump block.end.44931 put clear add "Error: misplaced terminal character at end of script! (line " ll add "). \n" add "Parse stack: " get add "\n" print clear quit block.end.44931: block.end.44935: # put the 4 (or less) tokens back on the stack push push push push testeof jumpfalse block.end.53302 print clear # create the virtual machine object code and save it # somewhere on the tape. add "#!/usr/bin/tclsh\n" add "\n" add "# code generated by \"translate.tcl.pss\" a pep script\n" add "# see bumble.sf.net/books/pars/\n" add "#import sys # \n" add "\n" add " # make a new machine. Standard tcl doesnt have objects\n" add " # so I will use an associative array, instead.\n" add " #array set mm {\n" add " # eof false # end of stream reached?\n" add " # charsRead 0 # how many chars already read\n" add " # linesRead 1 # how many lines already read\n" add " # escape \"\\\\\"\n" add " # delimiter \"*\" # push/pop delimiter (default \"*\")\n" add " # counter 0 # a counter for anything\n" add " # work \"\" # the workspace\n" add " # stack {} # stack for parse tokens \n" add " # cell 0 # current tape cell\n" add " # size 100 # the initial tape/marks list size\n" add " # tape {} # a list of attribute for tokens \n" add " # marks {} # marked tape cells\n" add " # peep [read stdin 1] \n" add " #}\n" add "\n" add " # make a new machine. Standard tcl doesnt have objects\n" add " # so I will use an associative array, instead.\n" add " array set mm {\n" add " eof false \n" add " charsRead 0 \n" add " linesRead 1 \n" add " escape \"\\\\\"\n" add " delimiter \"*\" \n" add " counter 0 \n" add " work \"\" \n" add " stack {} \n" add " cell 0 \n" add " size 0 \n" add " tape {} \n" add " marks {}\n" add " peep {} \n" add " }\n" add "\n" add " # Adds more elements to the tape and marks lists \n" add " proc MoreTape {} { \n" add " global mm\n" add " for {set ii 0} {$ii < 100} {incr ii} { \n" add " lappend mm(tape) \"\"; lappend mm(marks) \"\";\n" add " }\n" add " incr mm(size) 100\n" add " }\n" add "\n" add " # initialises a machine \n" add " proc Init {} { \n" add " global mm\n" add " set mm(peep) [ read stdin 1 ]\n" add " # or Read;\n" add " MoreTape;\n" add " }\n" add "\n" add " # read one character from the input stream and \n" add " # update the machine.\n" add " proc Read {} { \n" add " # use upvar eg\n" add " # upvar $machine mm\n" add " global mm\n" add " if { $mm(eof) } { exit }\n" add " incr mm(charsRead)\n" add " # increment lines\n" add " if { $mm(peep) eq \"\\n\" } { incr mm(linesRead) }\n" add " append mm(work) $mm(peep)\n" add " set mm(peep) [ read stdin 1 ]\n" add " if {[eof stdin]} { set mm(eof) true; set mm(peep) -1 }\n" add " } \n" add "\n" add " # increment tape pointer by one: trivial method? But need\n" add " # to increase tape/marks size if exceeded\n" add " proc Increment {} { global mm; incr mm(cell) } \n" add "\n" add " # remove escape character: trivial method ?\n" add " proc UnescapeChar {c} {\n" add " global mm\n" add " #if { $mm(work) ne \"\" } $mm(work = $mm(work.replace(\"\\\\\"+c, c)\n" add " }\n" add "\n" add " # add escape character : trivial\n" add " proc EscapeChar {c} {\n" add " global mm\n" add " #if { $mm(work) ne \"\" } { $mm(work = $mm(work.replace(c, \"\\\\\"+c) }\n" add " }\n" add "\n" add " # pop the first token from the stack into the workspace */\n" add " proc Pop {} { \n" add " global mm\n" add " if {[llength $mm(stack)] == 0} { return false }\n" add " # prepend last stack item, and delete the item\n" add " set mm(work) \"[lindex $mm(stack) end]$mm(work)\"\n" add " set mm(stack) [lrange $mm(stack) 0 [expr [llength $mm(stack)]-2]] \n" add " if {$mm(cell) > 0} { incr mm(cell) -1 }\n" add " return true\n" add " }\n" add " \n" add " # push the first token from the workspace to the stack \n" add " proc Push {} {\n" add " # lappend list $value\n" add " # dont increment the tape pointer on an empty push\n" add " global mm\n" add " if { $mm(work) eq \"\" } { return false }\n" add " # need to get this from the delimiter.\n" add " set firstdelim [string first $mm(delimiter) $mm(work)]\n" add " if {$firstdelim == -1} {\n" add " lappend mm(stack) $mm(work)\n" add " set mm(work) \"\"\n" add " incr mm(cell) 1\n" add " # a hack because \"stack\" hangs otherwise (never returns false)\n" add " return false\n" add " #return true\n" add " }\n" add " lappend mm(stack) [string range $mm(work) 0 $firstdelim]\n" add " set mm(work) [string range $mm(work) [expr {$firstdelim+1}] end]\n" add " incr mm(cell) 1\n" add " return true\n" add " }\n" add "\n" add " # a helper function\n" add " proc IsEscaped {suffix} {\n" add " global mm\n" add " # remove suffix\n" add " set count 0\n" add " set last [expr {[string last $suffix $mm(work)]-1}]\n" add " set new [string range $mm(work) 0 $last]\n" add " # now count trailing escape chars\n" add " while {[string index $new end] eq $mm(escape)} {\n" add " set last [expr {[string last $mm(escape) $new]-1}]\n" add " set new [string range $new 0 $last]\n" add " incr count\n" add " }\n" add " # puts count=$count\n" add " if { $count == 1 } { return true }\n" add " if {[expr {($count % 2) == 0}]} { return false } else { return true }\n" add " }\n" add "\n" add " # reads the input stream until the workspace end with text \n" add " proc Until {suffix} { \n" add " # read at least one character\n" add " global mm\n" add " if { $mm(eof) } { return }\n" add " Read;\n" add " while true { \n" add " if {$mm(eof)} { return }\n" add " # this must count trailing escapes\n" add " if {[string match *$suffix $mm(work)] && ![IsEscaped $suffix]} { return }\n" add " Read;\n" add " }\n" add " } \n" add "\n" add " # maybe not required \n" add " proc Swap {} { \n" add " global mm\n" add " set s $mm(work)\n" add " set mm(work) $mm(tape)[$mm(cell)]\n" add " # could be a problem if $s has spaces in it. (becomes a list)\n" add " lset mm(tape) $mm(cell) $s\n" add " }\n" add "\n" add " proc GoToMark {mark} { \n" add " # or use tcls lsearch here.\n" add " global mm\n" add " set ii [lsearch -exact $mm(marks) $mark]\n" add " if {$ii >= 0} { set mm(cell) $ii \n" add " } else { puts \"badmark \'$mark\'!\"; exit; }\n" add " }\n" add "\n" add " # todo! add an argument to this.\n" add " proc WriteToFile {} { \n" add " global mm\n" add " set f [open sav.pp w 0600] \n" add " puts $f $mm(work)\n" add " close $f\n" add " }\n" add "\n" add " # useful for debugging, the \"state\" command\n" add " proc State {} { \n" add " global mm\n" add " puts \"---------- Machine State --------------\";\n" add " puts -nonewline \" Stack\\[[join $mm(stack) {}]\\] Work\\[$mm(work)\\] \";\n" add " puts \"Peep\\[$mm(peep)\\]\";\n" add " puts -nonewline \" Acc:$mm(counter) Esc:$mm(escape) \";\n" add " puts -nonewline \"Delim:$mm(delimiter) Chars:$mm(charsRead) \";\n" add " puts \"Lines:$mm(linesRead)\";\n" add " puts \"---------- Tape (size:$mm(size)) --------------\";\n" add " set ii 0\n" add " while { $ii < 7 } {\n" add " puts -nonewline \" $ii\";\n" add " if { $ii == $mm(cell) } { \n" add " puts -nonewline \"> \"\n" add " } else { puts -nonewline \" \" }\n" add " # display marks\n" add " if { [lindex $mm(marks) $ii] ne \"\" } { \n" add " puts -nonewline \"\\\"[lindex $mm(marks) $ii]\\\" \"\n" add " } else { puts -nonewline \". \" }\n" add "\n" add " puts \"\\[[lindex $mm(tape) $ii]\\]\";\n" add " incr ii\n" add " }\n" add " }\n" add " # end of tcl pep Machine \"class\" (array) definition\n" add "\n" add " # a flag var to make .restart work in run-once loops\n" add " set restart false\n" add " # initialise the machine\n" add " Init;\n" add " \n" # save the code in the current tape cell put clear #--------------------- # check if the script correctly parsed (there should only # be one or two tokens on the stack, namely "commandset*" or "command*"). # or beginblock commandset pop pop testis "commandset*" jumptrue 4 testis "command*" jumptrue 2 jump block.end.51897 clear # indent generated code (6 spaces) for readability. add " " get replace "\n" "\n " put clear # restore the java preamble from the tape ++ get -- #add 'script: \n'; add "while !$mm(eof) { \n" get # end block marker add "\n}\n" add "# end of generated code\n" # put a copy of the final compilation into the tapecell # so it can be inspected interactively. put print clear quit block.end.51897: testis "beginblock*commandset*" jumptrue 4 testis "beginblock*command*" jumptrue 2 jump block.end.52616 clear # indentation not needed here #add ""; get; #replace "\n" "\n"; put; clear; # indent main code for readability. ++ add " " get replace "\n" "\n " put clear -- # get tcl preamble from tape ++ ++ get -- -- get add "\n" ++ # a labelled loop for "quit" (but quit can just exit?) #add "script: \n"; add "while !$mm(eof) { \n" get # end block marker required add "\n}\n" add "# end of generated code\n" # put a copy of the final compilation into the tapecell # for interactive debugging. put print clear quit block.end.52616: push push # try to explain some more errors unstack testbegins "parse>" jumpfalse block.end.52887 put clear add "[error] pep syntax error:\n" add " The parse> label cannot be the 1st item \n" add " of a script \n" print quit block.end.52887: put clear add "After compiling with 'translate.tcl.pss' (at EOF): \n " add " parse error in input script. \n " print clear # unstack; put; clear; add "Parse stack: " get add "\n" add " * debug script " add " >> pep -If script -i 'some input' \n " add " * debug compilation. \n " add " >> pep -Ia asm.pp script' \n " print clear quit block.end.53302: # not eof # there is an implicit .restart command here (jump start) jump start