/* Java code generated by "translate.java.pss" */ import java.io.*; import java.util.regex.*; import java.util.*; // contains stack public class sedparse { // using int instead of char so that all unicode code points are // available instead of just utf16. (emojis cant fit into utf16) private int accumulator; // counter for anything private int peep; // next char in input stream private int charsRead; // No. of chars read so far private int linesRead; // No. of lines read so far public StringBuffer workspace; // text accumulator private Stack stack; // parse token stack private int LENGTH; // tape initial length // use ArrayLists instead with .add() .get(n) and .set(n, E) // ArrayList al=new ArrayList(); private List tape; // array of token attributes private List marks; // tape marks private int tapePointer; // pointer to current cell private Reader input; // text input stream private boolean eof; // end of stream reached? private boolean flag; // not used here private StringBuffer escape; // char used to "escape" others "\" private StringBuffer delimiter; // push/pop delimiter (default is "*") private boolean markFound; // if the mark was found in tape /** make a new machine with a character stream reader */ public sedparse(Reader reader) { this.markFound = false; this.LENGTH = 100; this.input = reader; this.eof = false; this.flag = false; this.charsRead = 0; this.linesRead = 1; this.escape = new StringBuffer("\\"); this.delimiter = new StringBuffer("*"); this.accumulator = 0; this.workspace = new StringBuffer(""); this.stack = new Stack(); this.tapePointer = 0; this.tape = new ArrayList(); this.marks = new ArrayList(); for (int ii = 0; ii < this.LENGTH; ii++) { this.tape.add(new StringBuffer("")); this.marks.add(new StringBuffer("")); } try { this.peep = this.input.read(); } catch (java.io.IOException ex) { System.out.println("read error"); System.exit(-1); } } /** read one character from the input stream and update the machine. */ public void read() { int iChar; try { if (this.eof) { System.exit(0); } this.charsRead++; // increment lines if ((char)this.peep == '\n') { this.linesRead++; } this.workspace.append(Character.toChars(this.peep)); this.peep = this.input.read(); if (this.peep == -1) { this.eof = true; } } catch (IOException ex) { System.out.println("Error reading input stream" + ex); System.exit(-1); } } /** increment tape pointer by one */ public void increment() { this.tapePointer++; if (this.tapePointer >= this.LENGTH) { this.tape.add(new StringBuffer("")); this.marks.add(new StringBuffer("")); this.LENGTH++; } } /** remove escape character */ public void unescapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace("\\"+c, c+""); this.workspace.setLength(0); workspace.append(s); } } /** add escape character */ public void escapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace(c+"", "\\"+c); workspace.setLength(0); workspace.append(s); } } /** whether trailing escapes \\ are even or odd */ // untested code. check! eg try: add "x \\"; print; etc public boolean isEscaped(String ss, String sSuffix) { int count = 0; if (ss.length() < 2) return false; if (ss.length() <= sSuffix.length()) return false; if (ss.indexOf(this.escape.toString().charAt(0)) == -1) { return false; } int pos = ss.length()-sSuffix.length(); while ((pos > -1) && (ss.charAt(pos) == this.escape.toString().charAt(0))) { count++; pos--; } if (count % 2 == 0) return false; return true; } /* a helper to see how many trailing \\ escape chars */ private int countEscaped(String sSuffix) { String s = ""; int count = 0; int index = this.workspace.toString().lastIndexOf(sSuffix); // remove suffix if it exists if (index > 0) { s = this.workspace.toString().substring(0, index); } while (s.endsWith(this.escape.toString())) { count++; s = s.substring(0, s.lastIndexOf(this.escape.toString())); } return count; } /** reads the input stream until the workspace end with text */ // can test this with public void until(String sSuffix) { // read at least one character if (this.eof) return; this.read(); while (true) { if (this.eof) return; if (this.workspace.toString().endsWith(sSuffix)) { if (this.countEscaped(sSuffix) % 2 == 0) { return; } } this.read(); } } /** pop the first token from the stack into the workspace */ public Boolean pop() { if (this.stack.isEmpty()) return false; this.workspace.insert(0, this.stack.pop()); if (this.tapePointer > 0) this.tapePointer--; return true; } /** push the first token from the workspace to the stack */ public Boolean push() { String sItem; // dont increment the tape pointer on an empty push if (this.workspace.length() == 0) return false; // need to get this from this.delim not "*" int iFirstStar = this.workspace.indexOf(this.delimiter.toString()); if (iFirstStar != -1) { sItem = this.workspace.toString().substring(0, iFirstStar + 1); this.workspace.delete(0, iFirstStar + 1); } else { sItem = this.workspace.toString(); this.workspace.setLength(0); } this.stack.push(sItem); this.increment(); return true; } /** swap current tape cell with the workspace */ public void swap() { String s = new String(this.workspace); this.workspace.setLength(0); this.workspace.append(this.tape.get(this.tapePointer).toString()); this.tape.get(this.tapePointer).setLength(0); this.tape.get(this.tapePointer).append(s); } /** save the workspace to file "sav.pp" */ public void writeToFile() { try { File file = new File("sav.pp"); Writer out = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF8")); out.append(this.workspace.toString()); out.flush(); out.close(); } catch (Exception e) { System.out.println(e.getMessage()); } } public void goToMark(String mark) { this.markFound = false; for (var ii = 0; ii < this.marks.size(); ii++) { if (this.marks.get(ii).toString().equals(mark)) { this.tapePointer = ii; this.markFound = true; } } if (this.markFound == false) { System.out.print("badmark '" + mark + "'!"); System.exit(1); } } /** parse/check/compile the input */ public void parse(InputStreamReader input) { //this is where the actual parsing/compiling code should go //but this means that all generated code must use //"this." not "mm." } public static void main(String[] args) throws Exception { String temp = ""; sedparse mm = new sedparse(new InputStreamReader(System.in)); script: while (!mm.eof) { lex: { mm.read(); /* read */ // make char number relative to line, for error messages if (mm.workspace.toString().matches("^[\n]+$")) { mm.charsRead = 0; /* nochars */ } // newlines can separate commands in (gnu) sed so we will // just add a dummy ';' here. Also, no trailing ; is required if (mm.workspace.toString().matches("^[\n]+$")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append(";*"); /* add */ mm.push(); break lex; } // ignore extraneous white-space? if (mm.workspace.toString().matches("^\\p{Space}+$")) { mm.workspace.setLength(0); /* clear */ if (mm.eof) { break lex; } continue script; } // comments, if (mm.workspace.toString().equals("#")) { mm.until("\n"); if (!mm.workspace.toString().endsWith("\n")) { mm.workspace.append("\n"); /* add */ } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // uncomment line below to include comments in output // and make new reductions // add "comment*"; push; .reparse } // literal tokens '{' and '}' are used to group commands in // sed, ';' is used to separate commands and ',' to separate line // ranges. ! is the postfix negation operator for ranges if (mm.workspace.toString().equals(",") || mm.workspace.toString().equals("{") || mm.workspace.toString().equals("}") || mm.workspace.toString().equals(";") || mm.workspace.toString().equals("!")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("*"); /* add */ mm.push(); break lex; } // various actions: print, delete, swap if (mm.workspace.toString().equals("=") || mm.workspace.toString().equals("p") || mm.workspace.toString().equals("P") || mm.workspace.toString().equals("l") || mm.workspace.toString().equals("d") || mm.workspace.toString().equals("D") || mm.workspace.toString().equals("F") || mm.workspace.toString().equals("g") || mm.workspace.toString().equals("G") || mm.workspace.toString().equals("h") || mm.workspace.toString().equals("H") || mm.workspace.toString().equals("n") || mm.workspace.toString().equals("N") || mm.workspace.toString().equals("x") || mm.workspace.toString().equals("z")) { if (mm.workspace.toString().equals("=")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("=", "=; # print line-number + \\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("d")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("d", "d; # delete pattern-space, restart"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("D")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("D", "D; # delete pattern-space to 1st \\n, restart"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("e")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("e", "e; # exec patt-space command and replace"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("F")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("F", "F; # print input filename + \\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("g")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("g", "g; # replace patt-space with hold-space"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("G")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("G", "G; # append hold-space to patt-space + \\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("h")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("h", "h; # replace hold-space with patt-space"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("H")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("H", "H; # append patt-space to hold-space + \\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("l")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("l", "l; # print pattern-space unambiguously"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("n")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("n", "n; # print patt-space, get next line into patt-space "); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("N")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("N", "N; # append next line to patt-space + \\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("p")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("p", "p; # print pattern-space"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("P")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("P", "P; # print pattern-space up to 1st newline"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("x")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("x", "x; # swap pattern-space with hold-space"); mm.workspace.setLength(0); mm.workspace.append(temp); } } if (mm.workspace.toString().equals("z")) { /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("z", "z; # delete pattern-space, NO restart"); mm.workspace.setLength(0); mm.workspace.append(temp); } } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("action*"); /* add */ mm.push(); break lex; } // line numbers are also selectors if (mm.workspace.toString().matches("^[0-9]+$")) { /* while */ while (Character.toString((char)mm.peep).matches("^[0-9]+$")) { if (mm.eof) { break; } mm.read(); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("number*"); /* add */ mm.push(); break lex; } // $ is the last line of the file if (mm.workspace.toString().equals("$")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("number*"); /* add */ mm.push(); break lex; } // patterns - only execute commands if lines match if (mm.workspace.toString().equals("/")) { // save line/char number for error message mm.workspace.setLength(0); /* clear */ mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(", char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.until("/"); if (!mm.workspace.toString().endsWith("/")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Missing '/' to terminate "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("?\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // add any delimiter for pattern here, or none mm.workspace.append("/"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("/"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("pattern*"); /* add */ mm.push(); break lex; } // read transliteration commands if (mm.workspace.toString().equals("y")) { // save line/char number for error message mm.workspace.setLength(0); /* clear */ mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(", char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // allow spaces between 'y' and '/' although gnu set doesn't mm.until("/"); if (!mm.workspace.toString().endsWith("/") || !mm.workspace.toString().matches("^[ /]+$")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Missing '/' after 'y' transliterate command\n"); /* add */ mm.workspace.append("Or trailing characters "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } // save line/char number for error message mm.workspace.setLength(0); /* clear */ mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(", char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.until("/"); if (!mm.workspace.toString().endsWith("/")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Missing 2nd '/' after 'y' transliterate command "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.workspace.toString().equals("/")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Sed syntax error? \n"); /* add */ mm.workspace.append(" Empty regex after 'y' transliterate command "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } // replace pattern found if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("y/"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // save line/char number for error message mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(", char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.increment(); /* ++ */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.setLength(0); /* clear */ mm.until("/"); if (!mm.workspace.toString().endsWith("/")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Missing 3rd '/' after 'y' transliterate command "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.swap(); mm.workspace.append("/"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("/"); /* add */ // y/// does not have modifiers (unlike s///) mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("action*"); /* add */ mm.push(); break lex; } // various commands that have an option word parameter if (mm.workspace.toString().equals("b") || mm.workspace.toString().equals("e") || mm.workspace.toString().equals("q") || mm.workspace.toString().equals("Q") || mm.workspace.toString().equals("t") || mm.workspace.toString().equals("T")) { // ignore intervening space if any mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ /* while */ while (Character.toString((char)mm.peep).matches("^[ ]+$")) { if (mm.eof) { break; } mm.read(); } mm.workspace.setLength(0); /* clear */ // A bit more permissive that gnu-sed which doesn't allow // read to end in ';'. /* whilenot */ while (!Character.toString((char)mm.peep).matches("^[ ;}]+$")) { if (mm.eof) { break; } mm.read(); } // word parameters are optional to these commands // just add a space to separate command from parameter if (!mm.workspace.toString().equals("")) { mm.swap(); mm.workspace.append(" "); /* add */ mm.swap(); } mm.swap(); mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().startsWith("b")) { mm.workspace.append("; # branch to