#* This script is an attempt to parse and check the syntax of a 'sed' (the unix stream editor) script. If this is successful we can modify the script to translate to other languages (including compilable languages) The grammar in sed.tojava.pss is much more complete than this one. Include a/c/i and 0~9 etc. NOTES the 0~8 gnu sed syntax (every 8th line) is not parsed at the moment. The file 'sed1line.txt' can be used to test this script. This script is only recognising a large subset of gnu sed commands at the moment. Also, it does not parse the regular expressions. Currently there is a difficulty for the pep machine in dealing with the sed syntax 's#a#A#p'. That is, where alternative delimiters are used for substitutions. This could be solved with a new 'until' command that looks to the tapecell for the stop condition (text). Initially, I will only allow standard s/a/A/p syntax. The strategy for translating sed scripts into other languages will be very similar to the strategy for translating pep scripts. A simple, text-based virtual machine will be included in the generated code, and each sed command will be an instruction or test on that virtual machine. HISTORY 30 june 2022 Did negation and $ lines. Only 'a', 'c', and 'i' need to be parsed now. Big subset of sed commands recognised. Grammar is slightly more permissive than gnu sed (eg: spaces between s and /// ) 29 june 2022 Basic parsing written, working but not all sed commands recognised yet. *# read; # make char number relative to line, for error messages [\n] { nochars; } # newlines can separate commands in (gnu) sed so we will # just add a dummy ';' here. Also, no trailing ; is required [\n] { put; clear; add ";*"; push; .reparse } # ignore extraneous white-space? [:space:] { clear; (eof) { .reparse } .restart } # comments, "#" { until "\n"; !E"\n" { add "\n"; } put; clear; # uncomment line below to include comments in output # and make new reductions # add "comment*"; push; .reparse } # literal tokens '{' and '}' are used to group commands in # sed, ';' is used to separate commands and ',' to separate line # ranges. ! is the postfix negation operator for ranges ",","{","}",";","!" { put; add "*"; push; .reparse } # various actions: print, delete, swap "=","p","P","l","d","D","F","g","G","h","H", "n","N","x","z" { "=" { replace "=" "=; # print line-number + \\n"; } "d" { replace "d" "d; # delete pattern-space, restart"; } "D" { replace "D" "D; # delete pattern-space to 1st \\n, restart"; } "e" { replace "e" "e; # exec patt-space command and replace"; } "F" { replace "F" "F; # print input filename + \\n"; } "g" { replace "g" "g; # replace patt-space with hold-space"; } "G" { replace "G" "G; # append hold-space to patt-space + \\n"; } "h" { replace "h" "h; # replace hold-space with patt-space"; } "H" { replace "H" "H; # append patt-space to hold-space + \\n"; } "l" { replace "l" "l; # print pattern-space unambiguously"; } "n" { replace "n" "n; # print patt-space, get next line into patt-space "; } "N" { replace "N" "N; # append next line to patt-space + \\n "; } "p" { replace "p" "p; # print pattern-space"; } "P" { replace "P" "P; # print pattern-space up to 1st newline"; } "x" { replace "x" "x; # swap pattern-space with hold-space"; } "z" { replace "z" "z; # delete pattern-space, NO restart"; } put; clear; add "action*"; push; .reparse } # line numbers are also selectors [0-9] { while [0-9]; put; clear; add "number*"; push; .reparse } # $ is the last line of the file "$" { put; clear; add "number*"; push; .reparse } # patterns - only execute commands if lines match "/" { # save line/char number for error message clear; add "near line "; lines; add ", char "; chars; put; clear; until "/"; !E"/" { clear; add "Missing '/' to terminate "; get; add "?\n"; print; quit; } clip; put; clear; # add any delimiter for pattern here, or none add "/"; get; add "/"; put; clear; add "pattern*"; push; .reparse } # read transliteration commands "y" { # save line/char number for error message clear; add "near line "; lines; add ", char "; chars; put; clear; # allow spaces between 'y' and '/' although gnu set doesn't until "/"; !E"/",![ /] { clear; add "Missing '/' after 'y' transliterate command\n"; add "Or trailing characters "; get; add "\n"; print; quit; } # save line/char number for error message clear; add "near line "; lines; add ", char "; chars; put; clear; until "/"; !E"/" { clear; add "Missing 2nd '/' after 'y' transliterate command "; get; add "\n"; print; quit; } "/" { clear; add "Sed syntax error? \n"; add " Empty regex after 'y' transliterate command "; get; add "\n"; print; quit; } # replace pattern found clip; put; clear; add "y/"; get; put; clear; # save line/char number for error message add "near line "; lines; add ", char "; chars; ++; put; --; clear; until "/"; !E"/" { clear; add "Missing 3rd '/' after 'y' transliterate command "; get; add "\n"; print; quit; } clip; swap; add "/"; get; add "/"; # y/// does not have modifiers (unlike s///) put; clear; add "action*"; push; .reparse } # various commands that have an option word parameter "b","e","q","Q","t","T" { # ignore intervening space if any put; clear; while [ ]; clear; # A bit more permissive that gnu-sed which doesn't allow # read to end in ';'. whilenot [ ;}]; # word parameters are optional to these commands # just add a space to separate command from parameter !"" { swap; add " "; swap;} swap; get; B"b" { add "; # branch to