#* This script translates 'sed' (the unix stream editor) scripts into java source code. This version has all the code in the 'main' function. STATUS 3 july 2022 Syntax is parsing well, lots of functionality but still missing all branching commands, also 'w' after s/// also missing after s/// for nth substitution. Also missing a/c/i commands. BUGS The regex patterns are java's not gnu seds. eg $1 to replace group, not \1 NOTES string.matches and Pattern.matches matches all whole input! So need to add .* to regexs. The script will use a similar strategy as tr/translate.java.pss Each machine command will probably be a method, except trivial commands, for which 'in-line' code can be generated. The 0~8 gnu sed syntax (every 8th line) is not parsed at the moment. The file 'sed1line.txt' can be used to test this script. This script is only recognising a large subset of gnu sed commands at the moment. Also, it does not parse the regular expressions. Currently there is a difficulty for the pep machine in dealing with the sed syntax 's#a#A#p'. That is, where alternative delimiters are used for substitutions. This could be solved with a new 'until' command that looks to the tapecell for the stop condition (text). Initially, I will only allow standard s/a/A/p syntax. The strategy for translating sed scripts into other languages will be very similar to the strategy for translating pep scripts. A simple, text-based virtual machine will be included in the generated code, and each sed command will be an instruction or test on that virtual machine. Grammar is slightly more permissive than gnu sed (eg: spaces between s and /// ) GNU SED COMMANDS 'D' command If pattern space contains newlines, delete text in the pattern space up to the first newline, and restart cycle with the resultant pattern space, without reading a new line of input. If pattern space contains no newline, start a normal new cycle as if the d command was issued. HISTORY 3 july 2022 Implemented lots of commands, but all branching commands are not implemented and may not be. Also a/c/i are not either but can be. Did y/// command and read file command. There are a lot of details to cover with this conversion, including seeing how gnu sed behaves. eg s/a/a/p does print the line twice. 1 july 2022 A lot of progress, need to do the rRwW commands. A big challenge are the branching commands tTbB etc because java has no goto. Also need to do the text insert command a/i/c. Worked on ranges, which seem to be working now. The grammar is now a little different to eg/sed.parse.pss because of the necessity of generating java code. 30 june 2022 Started to adapt from sed.parse.pss Code is compiling with very simple commands. Use the pep.sedjas and pep.sedjaf helper functions to test this. *# read; # make char number relative to line, for error messages [\n] { nochars; } # newlines can separate commands in (gnu) sed so we will # just add a dummy ';' here. Also, no trailing ; is required [\n] { put; clear; add ";*"; push; .reparse } # ignore extraneous white-space? [:space:] { clear; (eof) { .reparse } .restart } # comments, convert to java comments "#" { clear; add "/* "; until "\n"; E"\n" { clip; } add " */\n"; put; clear; # uncomment line below to include comments in output # add "comment*"; push; .reparse } # literal tokens '{' and '}' are used to group commands in # sed, ';' is used to separate commands and ',' to separate line # ranges. ! is the postfix negation operator for ranges ",","{","}",";","!" { put; add "*"; push; .reparse } # various actions: print, delete, swap "=","p","P","l","d","D","F","g","G","h","H", "n","N","x","z" { "=" { clear; # print line-number + newline add "System.out.println(mm.linesRead); /* '=' */"; } "d" { clear; # 'd' delete pattern-space, restart # the if true trick is necessary to avoid 'unreachable statement' # java compile errors (when multiple 'd' commands are given) add "if (true) { mm.patternSpace.setLength(0); continue; } /* 'd' */"; } "D" { clear; # add "/* 'D' delete pattern-space to 1st \\n, restart */"; add "if (mm.patternSpace.indexOf(\"\\n\") > -1) {\n"; add " mm.patternSpace.delete(0, mm.patternSpace.indexOf(\"\\n\"));\n"; add " mm.readNext = false; if (true) continue; \n"; add "} else { mm.patternSpace.setLength(0); continue; } /* 'd' */"; } "F" { # F: print input filename + newline # maybe unsupported in java clear; add 'System.out.println(""); /* F */'; } "g" { # g: replace patt-space with hold-space clear; add "mm.patternSpace.setLength(0); \n"; add "mm.patternSpace.append(mm.holdSpace); /* 'g' */"; } "G" { # G; append hold-space to patt-space + \\n" clear; add "mm.patternSpace.append(\"\\n\" + mm.holdSpace); /* 'G' */"; } "h" { # h: replace hold-space with patt-space clear; add "mm.holdSpace.setLength(0); \n"; add "mm.holdSpace.append(mm.patternSpace); /* 'h' */"; } "H" { # H: append patt-space to hold-space + newline clear; add "mm.holdSpace.append(\"\\n\" + mm.patternSpace); /* 'H' */"; } "l" { # print pattern-space unambiguously, synonym for p ? clear; add "System.out.println(mm.patternSpace); /* 'l' */"; } "n" { # n: print patt-space, get next line into patt-space clear; add "if (mm.autoPrint) { System.out.println(mm.patternSpace); }\n"; add "mm.patternSpace.setLength(0);\n"; add "mm.readLine(); /* 'n' */"; } "N" { # N: append next line to patt-space + newline clear; add "mm.patternSpace.append('\\n'); "; add "mm.readLine(); /* 'N' */"; } "p" { clear; add "System.out.println(mm.patternSpace); /* 'p' */"; } "P" { # P: print pattern-space up to 1st newline" clear; add 'if (mm.patternSpace.indexOf("\\n") > -1) {\n'; add ' System.out.println(\n'; add ' mm.patternSpace.substring(0, mm.patternSpace.indexOf("\\n")));\n'; add "} else { System.out.println(mm.patternSpace); }"; } "x" { # x: # swap pattern-space with hold-space clear; add "mm.swap(); /* x */"; } "z" { # z: delete pattern-space, NO restart clear; add "mm.patternSpace.setLenth(0); /* z */"; } put; clear; add "action*"; push; .reparse } # line numbers are also selectors [0-9] { while [0-9]; put; clear; add "number*"; push; .reparse } # $ is the last line of the file "$" { put; clear; add "number*"; push; .reparse } # patterns - only execute commands if lines match "/" { # save line/char number for error message clear; add "near line/char "; lines; add ":"; chars; put; clear; until "/"; !E"/" { clear; add "Missing '/' to terminate "; get; add "?\n"; print; quit; } clip; # java .matches method matches whole string not substring # so we need to add .* at beginning and end, but not if regex # begins with ^ or ends with $. complicated hey !E"$" { add ".*$"; } !B"^" { put; clear; add "^.*"; get; } put; clear; # add any delimiter for pattern here, or none add '"'; get; add '"'; put; clear; add "pattern*"; push; .reparse } # read transliteration commands "y" { # save line/char number for error message clear; add "near line "; lines; add ", char "; chars; put; clear; # allow spaces between 'y' and '/' although gnu set doesn't until "/"; !E"/",![ /] { clear; add "Missing '/' after 'y' transliterate command\n"; add "Or trailing characters "; get; add "\n"; print; quit; } # save line/char number for error message clear; add "near line "; lines; add ", char "; chars; put; clear; until "/"; !E"/" { clear; add "Missing 2nd '/' after 'y' transliterate command "; get; add "\n"; print; quit; } "/" { clear; add "Sed syntax error? \n"; add " Empty regex after 'y' transliterate command "; get; add "\n"; print; quit; } # replace pattern found clip; put; clear; add 'mm.transliterate("'; get; add '", "'; put; clear; # save line/char number for error message add "near line "; lines; add ", char "; chars; ++; put; --; clear; until "/"; !E"/" { clear; add "Missing 3rd '/' after 'y' transliterate command "; get; add "\n"; print; quit; } clip; swap; get; add '"); /* y */ '; # y/// does not have modifiers (unlike s///) put; clear; add "action*"; push; .reparse } # various commands that have an option word parameter # e has two variants # "e" { replace "e" "e; # exec patt-space command and replace"; } "b","e","q","Q","t","T" { # ignore intervening space if any put; clear; while [ ]; clear; # A bit more permissive that gnu-sed which doesn't allow # read to end in ';'. whilenot [ ;}]; # word parameters are optional to these commands # just add a space to separate command from parameter !"" { swap; add " "; swap;} swap; get; # hard to implement because java has no goto ? B"b" { clear; # todo: 'b' branch to