/* Java code generated by "translate.java.pss" */ import java.io.*; import java.util.regex.*; import java.util.*; // contains stack public class Machine { // using int instead of char so that all unicode code points are // available instead of just utf16. (emojis cant fit into utf16) private int accumulator; // counter for anything private int peep; // next char in input stream private int charsRead; // No. of chars read so far private int linesRead; // No. of lines read so far public StringBuffer workspace; // text accumulator private Stack stack; // parse token stack private int LENGTH; // tape initial length // use ArrayLists instead with .add() .get(n) and .set(n, E) // ArrayList al=new ArrayList(); private List tape; // array of token attributes private List marks; // tape marks private int tapePointer; // pointer to current cell private Reader input; // text input stream private boolean eof; // end of stream reached? private boolean flag; // not used here private StringBuffer escape; // char used to "escape" others "\" private StringBuffer delimiter; // push/pop delimiter (default is "*") private boolean markFound; // if the mark was found in tape /** make a new machine with a character stream reader */ public Machine(Reader reader) { this.markFound = false; this.LENGTH = 100; this.input = reader; this.eof = false; this.flag = false; this.charsRead = 0; this.linesRead = 1; this.escape = new StringBuffer("\\"); this.delimiter = new StringBuffer("*"); this.accumulator = 0; this.workspace = new StringBuffer(""); this.stack = new Stack(); this.tapePointer = 0; this.tape = new ArrayList(); this.marks = new ArrayList(); for (int ii = 0; ii < this.LENGTH; ii++) { this.tape.add(new StringBuffer("")); this.marks.add(new StringBuffer("")); } try { this.peep = this.input.read(); } catch (java.io.IOException ex) { System.out.println("read error"); System.exit(-1); } } /** read one character from the input stream and update the machine. */ public void read() { int iChar; try { if (this.eof) { System.exit(0); } this.charsRead++; // increment lines if ((char)this.peep == '\n') { this.linesRead++; } this.workspace.append(Character.toChars(this.peep)); this.peep = this.input.read(); if (this.peep == -1) { this.eof = true; } } catch (IOException ex) { System.out.println("Error reading input stream" + ex); System.exit(-1); } } /** increment tape pointer by one */ public void increment() { this.tapePointer++; if (this.tapePointer >= this.LENGTH) { this.tape.add(new StringBuffer("")); this.marks.add(new StringBuffer("")); this.LENGTH++; } } /** remove escape character */ public void unescapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace("\\"+c, c+""); this.workspace.setLength(0); workspace.append(s); } } /** add escape character */ public void escapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace(c+"", "\\"+c); workspace.setLength(0); workspace.append(s); } } /** whether trailing escapes \\ are even or odd */ // untested code. check! eg try: add "x \\"; print; etc public boolean isEscaped(String ss, String sSuffix) { int count = 0; if (ss.length() < 2) return false; if (ss.length() <= sSuffix.length()) return false; if (ss.indexOf(this.escape.toString().charAt(0)) == -1) { return false; } int pos = ss.length()-sSuffix.length(); while ((pos > -1) && (ss.charAt(pos) == this.escape.toString().charAt(0))) { count++; pos--; } if (count % 2 == 0) return false; return true; } /* a helper to see how many trailing \\ escape chars */ private int countEscaped(String sSuffix) { String s = ""; int count = 0; int index = this.workspace.toString().lastIndexOf(sSuffix); // remove suffix if it exists if (index > 0) { s = this.workspace.toString().substring(0, index); } while (s.endsWith(this.escape.toString())) { count++; s = s.substring(0, s.lastIndexOf(this.escape.toString())); } return count; } /** reads the input stream until the workspace end with text */ // can test this with public void until(String sSuffix) { // read at least one character if (this.eof) return; this.read(); while (true) { if (this.eof) return; if (this.workspace.toString().endsWith(sSuffix)) { if (this.countEscaped(sSuffix) % 2 == 0) { return; } } this.read(); } } /** pop the first token from the stack into the workspace */ public Boolean pop() { if (this.stack.isEmpty()) return false; this.workspace.insert(0, this.stack.pop()); if (this.tapePointer > 0) this.tapePointer--; return true; } /** push the first token from the workspace to the stack */ public Boolean push() { String sItem; // dont increment the tape pointer on an empty push if (this.workspace.length() == 0) return false; // need to get this from this.delim not "*" int iFirstStar = this.workspace.indexOf(this.delimiter.toString()); if (iFirstStar != -1) { sItem = this.workspace.toString().substring(0, iFirstStar + 1); this.workspace.delete(0, iFirstStar + 1); } else { sItem = this.workspace.toString(); this.workspace.setLength(0); } this.stack.push(sItem); this.increment(); return true; } /** swap current tape cell with the workspace */ public void swap() { String s = new String(this.workspace); this.workspace.setLength(0); this.workspace.append(this.tape.get(this.tapePointer).toString()); this.tape.get(this.tapePointer).setLength(0); this.tape.get(this.tapePointer).append(s); } /** save the workspace to file "sav.pp" */ public void writeToFile() { try { File file = new File("sav.pp"); Writer out = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF8")); out.append(this.workspace.toString()); out.flush(); out.close(); } catch (Exception e) { System.out.println(e.getMessage()); } } public void goToMark(String mark) { this.markFound = false; for (var ii = 0; ii < this.marks.size(); ii++) { if (this.marks.get(ii).toString().equals(mark)) { this.tapePointer = ii; this.markFound = true; } } if (this.markFound == false) { System.out.print("badmark '" + mark + "'!"); System.exit(1); } } /** parse/check/compile the input */ public void parse(InputStreamReader input) { //this is where the actual parsing/compiling code should go //but this means that all generated code must use //"this." not "mm." } public static void main(String[] args) throws Exception { String temp = ""; Machine mm = new Machine(new InputStreamReader(System.in)); script: while (!mm.eof) { lex: { mm.read(); /* read */ //-------------- if (mm.workspace.toString().matches("^\\p{Space}+$")) { if (mm.workspace.toString().matches("^[\n]+$")) { mm.charsRead = 0; /* nochars */ } mm.workspace.setLength(0); /* clear */ if (!mm.eof) { continue script; } break lex; } //--------------- // We can ellide all these single character tests, because // the stack token is just the character itself with a * // Braces {} are used for blocks of commands, ',' and '.' for concatenating // tests with OR or AND logic. 'B' and 'E' for begin and end // tests, '!' is used for negation, ';' is used to terminate a // command. if (mm.workspace.toString().equals("{") || mm.workspace.toString().equals("}") || mm.workspace.toString().equals(";") || mm.workspace.toString().equals(",") || mm.workspace.toString().equals(".") || mm.workspace.toString().equals("!") || mm.workspace.toString().equals("B") || mm.workspace.toString().equals("E")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("*"); /* add */ mm.push(); break lex; } //--------------- // format: "text" if (mm.workspace.toString().equals("\"")) { // save the start line number (for error messages) in case // there is no terminating quote character. mm.workspace.setLength(0); /* clear */ mm.workspace.append("line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("\""); /* add */ mm.until("\""); if (!mm.workspace.toString().endsWith("\"")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Unterminated quote character (\") starting at "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(" !\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("quote*"); /* add */ mm.push(); break lex; } //--------------- // format: 'text', single quotes are converted to double quotes // but we must escape embedded double quotes. if (mm.workspace.toString().equals("'")) { // save the start line number (for error messages) in case // there is no terminating quote character. mm.workspace.setLength(0); /* clear */ mm.workspace.append("line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.until("'"); if (!mm.workspace.toString().endsWith("'")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Unterminated quote (') starting at "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("!\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.escapeChar("\"".charAt(0)); mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("\""); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\""); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("quote*"); /* add */ mm.push(); break lex; } //--------------- // formats: [:space:] [a-z] [abcd] [:alpha:] etc // should class tests really be multiline??! if (mm.workspace.toString().equals("[")) { // save the start line number (for error messages) in case // there is no terminating bracket character. mm.workspace.setLength(0); /* clear */ mm.workspace.append("line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("["); /* add */ mm.until("]"); if (mm.workspace.toString().equals("[]")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("pep script error at line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" empty character class [] \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (!mm.workspace.toString().endsWith("]")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Unterminated class text ([...]) starting at "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(""); mm.workspace.append("\n class text can be used in tests or with the 'while' and "); mm.workspace.append("\n 'whilenot' commands. For example: "); mm.workspace.append("\n [:alpha:] { while [:alpha:]; print; clear; }"); mm.workspace.append("\n "); /* add */ System.out.print(mm.workspace); /* print */ break script; } // need to escape quotes so they dont interfere with the // quotes java needs for .matches("...") mm.escapeChar("\"".charAt(0)); // the caret is not a negation operator in pep scripts /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("^", "\\\\^"); mm.workspace.setLength(0); mm.workspace.append(temp); } // save the class on the tape mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ if (!mm.workspace.toString().startsWith("-")) { // not a range class, eg [a-z] so need to escape '-' chars // java requires a double escape mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("-", "\\\\-"); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().startsWith("-")) { // a range class, eg [a-z], check if it is correct if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (!mm.workspace.toString().equals("-")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Error in pep script at line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" Incorrect character range class "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(""); mm.workspace.append("\n For example:"); mm.workspace.append("\n [a-g] # correct"); mm.workspace.append("\n [f-gh] # error! \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } } mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ // restore class text if (mm.workspace.toString().startsWith("[:") && !mm.workspace.toString().endsWith(":]")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("malformed character class starting at "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("!\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.workspace.toString().startsWith("[:") && !mm.workspace.toString().equals("[:]")) { if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ // unicode posix character classes in java // Also, abbreviations (not implemented in gh.c yet.) if (mm.workspace.toString().equals("alnum") || mm.workspace.toString().equals("N")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Alnum}"); /* add */ } if (mm.workspace.toString().equals("alpha") || mm.workspace.toString().equals("A")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Alpha}"); /* add */ } if (mm.workspace.toString().equals("ascii") || mm.workspace.toString().equals("I")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{ASCII}"); /* add */ } if (mm.workspace.toString().equals("blank") || mm.workspace.toString().equals("B")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Blank}"); /* add */ } if (mm.workspace.toString().equals("cntrl") || mm.workspace.toString().equals("C")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Cntrl}"); /* add */ } if (mm.workspace.toString().equals("digit") || mm.workspace.toString().equals("D")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Digit}"); /* add */ } if (mm.workspace.toString().equals("graph") || mm.workspace.toString().equals("G")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Graph}"); /* add */ } // or equiv to graph [^\p{Z}\p{C}] as suggested on stack overflow if (mm.workspace.toString().equals("lower") || mm.workspace.toString().equals("L")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Lower}"); /* add */ } if (mm.workspace.toString().equals("print") || mm.workspace.toString().equals("P")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Print}"); /* add */ } if (mm.workspace.toString().equals("punct") || mm.workspace.toString().equals("T")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Punct}"); /* add */ } if (mm.workspace.toString().equals("space") || mm.workspace.toString().equals("S")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Space}"); /* add */ } if (mm.workspace.toString().equals("upper") || mm.workspace.toString().equals("U")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{Upper}"); /* add */ } if (mm.workspace.toString().equals("xdigit") || mm.workspace.toString().equals("X")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("\\\\p{XDigit}"); /* add */ } if (!mm.workspace.toString().startsWith("\\\\p{")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("Pep script syntax error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append("Unknown character class '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("'\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // add quotes around the class and limits around the // class so it can be used with the string.matches() method // (must match the whole string, not just one character) mm.workspace.append("\"^"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("+$\""); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("class*"); /* add */ mm.push(); break lex; } //--------------- // formats: (eof) (EOF) (==) etc. if (mm.workspace.toString().equals("(")) { mm.workspace.setLength(0); /* clear */ mm.until(")"); if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); if (mm.workspace.toString().equals("eof") || mm.workspace.toString().equals("EOF")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("eof*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("==")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("tapetest*"); /* add */ mm.push(); break lex; } mm.workspace.append(" << unknown test near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script.\n"); /* add */ mm.workspace.append(" bracket () tests are \n"); /* add */ mm.workspace.append(" (eof) test if end of stream reached. \n"); /* add */ mm.workspace.append(" (==) test if workspace is same as current tape cell \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } //--------------- // multiline and single line comments, eg #... and #* ... *# if (mm.workspace.toString().equals("#")) { mm.workspace.setLength(0); /* clear */ mm.read(); /* read */ if (mm.workspace.toString().equals("\n")) { mm.workspace.setLength(0); /* clear */ break lex; } // checking for multiline comments of the form "#* \n\n\n *#" // these are just ignored at the moment (deleted) if (mm.workspace.toString().equals("*")) { // save the line number for possible error message later mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.linesRead); /* lines */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.until("*#"); if (mm.workspace.toString().endsWith("*#")) { // convert to /* ... */ java multiline comment if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("/*"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("*/"); /* add */ // create a "comment" parse token mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // comment-out this line to remove multiline comments from the // compiled java. // add "comment*"; push; break lex; } // make an unterminated multiline comment an error // to ease debugging of scripts. mm.workspace.setLength(0); /* clear */ mm.workspace.append("unterminated multiline comment #* ... *# \n"); /* add */ mm.workspace.append("stating at line number "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // single line comments. some will get lost. mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("//"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.until("\n"); if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("comment*"); /* add */ mm.push(); break lex; } //---------------------------------- // parse command words (and abbreviations) // legal characters for keywords (commands) if (!mm.workspace.toString().matches("^[abcdefghijklmnopqrstuvwxyzBEKGPRUWS+\\-<>0\\^]+$")) { // error message about a misplaced character mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("!! Misplaced character '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("' in script near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // my testclass implementation cannot handle complex lists // eg [a-z+-] this is why I have to write out the whole alphabet /* while */ while (Character.toString((char)mm.peep).matches("^[abcdefghijklmnopqrstuvwxyzBEOFKGPRUWS+\\-<>0\\^]+$")) { if (mm.eof) { break; } mm.read(); } //---------------------------------- // KEYWORDS // here we can test for all the keywords (command words) and their // abbreviated one letter versions (eg: clip k, clop K etc). Then // we can print an error message and abort if the word is not a // legal keyword for the parse-edit language // make ll an alias for "lines" and cc an alias for chars if (mm.workspace.toString().equals("ll")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("lines"); /* add */ } if (mm.workspace.toString().equals("cc")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("chars"); /* add */ } // one letter command abbreviations if (mm.workspace.toString().equals("a")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("add"); /* add */ } if (mm.workspace.toString().equals("k")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("clip"); /* add */ } if (mm.workspace.toString().equals("K")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("clop"); /* add */ } if (mm.workspace.toString().equals("D")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("replace"); /* add */ } if (mm.workspace.toString().equals("d")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("clear"); /* add */ } if (mm.workspace.toString().equals("t")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("print"); /* add */ } if (mm.workspace.toString().equals("p")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("pop"); /* add */ } if (mm.workspace.toString().equals("P")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("push"); /* add */ } if (mm.workspace.toString().equals("u")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("unstack"); /* add */ } if (mm.workspace.toString().equals("U")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("stack"); /* add */ } if (mm.workspace.toString().equals("G")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("put"); /* add */ } if (mm.workspace.toString().equals("g")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("get"); /* add */ } if (mm.workspace.toString().equals("x")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("swap"); /* add */ } if (mm.workspace.toString().equals(">")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("++"); /* add */ } if (mm.workspace.toString().equals("<")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("--"); /* add */ } if (mm.workspace.toString().equals("m")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mark"); /* add */ } if (mm.workspace.toString().equals("M")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("go"); /* add */ } if (mm.workspace.toString().equals("r")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("read"); /* add */ } if (mm.workspace.toString().equals("R")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("until"); /* add */ } if (mm.workspace.toString().equals("w")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("while"); /* add */ } if (mm.workspace.toString().equals("W")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("whilenot"); /* add */ } if (mm.workspace.toString().equals("n")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("count"); /* add */ } if (mm.workspace.toString().equals("+")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("a+"); /* add */ } if (mm.workspace.toString().equals("-")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("a-"); /* add */ } if (mm.workspace.toString().equals("0")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("zero"); /* add */ } if (mm.workspace.toString().equals("c")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("chars"); /* add */ } if (mm.workspace.toString().equals("l")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("lines"); /* add */ } if (mm.workspace.toString().equals("^")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("escape"); /* add */ } if (mm.workspace.toString().equals("v")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("unescape"); /* add */ } if (mm.workspace.toString().equals("z")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("delim"); /* add */ } if (mm.workspace.toString().equals("S")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("state"); /* add */ } if (mm.workspace.toString().equals("q")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("quit"); /* add */ } if (mm.workspace.toString().equals("s")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("write"); /* add */ } if (mm.workspace.toString().equals("o")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("nop"); /* add */ } if (mm.workspace.toString().equals("rs")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("restart"); /* add */ } if (mm.workspace.toString().equals("rp")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("reparse"); /* add */ } // some extra syntax for testeof and testtape if (mm.workspace.toString().equals("") || mm.workspace.toString().equals("")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("eof*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("<==>")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("tapetest*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("jump") || mm.workspace.toString().equals("jumptrue") || mm.workspace.toString().equals("jumpfalse") || mm.workspace.toString().equals("testis") || mm.workspace.toString().equals("testclass") || mm.workspace.toString().equals("testbegins") || mm.workspace.toString().equals("testends") || mm.workspace.toString().equals("testeof") || mm.workspace.toString().equals("testtape")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("The instruction '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("' near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")\n"); /* add */ mm.workspace.append("can be used in pep assembly code but not scripts. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // show information if these "deprecated" commands are used if (mm.workspace.toString().equals("Q") || mm.workspace.toString().equals("bail") || mm.workspace.toString().equals("state")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("The instruction '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("' near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")\n"); /* add */ mm.workspace.append("is no longer part of the pep language (july 2020). \n"); /* add */ mm.workspace.append("use 'quit' instead of 'bail', and use 'unstack; print;' \n"); /* add */ mm.workspace.append("instead of 'state'. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("add") || mm.workspace.toString().equals("clip") || mm.workspace.toString().equals("clop") || mm.workspace.toString().equals("replace") || mm.workspace.toString().equals("upper") || mm.workspace.toString().equals("lower") || mm.workspace.toString().equals("cap") || mm.workspace.toString().equals("clear") || mm.workspace.toString().equals("print") || mm.workspace.toString().equals("pop") || mm.workspace.toString().equals("push") || mm.workspace.toString().equals("unstack") || mm.workspace.toString().equals("stack") || mm.workspace.toString().equals("put") || mm.workspace.toString().equals("get") || mm.workspace.toString().equals("swap") || mm.workspace.toString().equals("++") || mm.workspace.toString().equals("--") || mm.workspace.toString().equals("mark") || mm.workspace.toString().equals("go") || mm.workspace.toString().equals("read") || mm.workspace.toString().equals("until") || mm.workspace.toString().equals("while") || mm.workspace.toString().equals("whilenot") || mm.workspace.toString().equals("count") || mm.workspace.toString().equals("a+") || mm.workspace.toString().equals("a-") || mm.workspace.toString().equals("zero") || mm.workspace.toString().equals("chars") || mm.workspace.toString().equals("lines") || mm.workspace.toString().equals("nochars") || mm.workspace.toString().equals("nolines") || mm.workspace.toString().equals("escape") || mm.workspace.toString().equals("unescape") || mm.workspace.toString().equals("delim") || mm.workspace.toString().equals("quit") || mm.workspace.toString().equals("write") || mm.workspace.toString().equals("nop") || mm.workspace.toString().equals("reparse") || mm.workspace.toString().equals("restart")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("word*"); /* add */ mm.push(); break lex; } //------------ // the .reparse command and "parse label" is a simple way to // make sure that all shift-reductions occur. It should be used inside // a block test, so as not to create an infinite loop. There is // no "goto" in java so we need to use labelled loops to // implement .reparse/parse> if (mm.workspace.toString().equals("parse>")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.accumulator); /* count */ if (!mm.workspace.toString().equals("0")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("script error:\n"); /* add */ mm.workspace.append(" extra parse> label at line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(".\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } mm.workspace.setLength(0); /* clear */ mm.workspace.append("// parse>"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("parse>*"); /* add */ mm.push(); // use accumulator to indicate after parse> label mm.accumulator++; /* a+ */ break lex; } // -------------------- // implement "begin-blocks", which are only executed // once, at the beginning of the script (similar to awk's BEGIN {} rules) if (mm.workspace.toString().equals("begin")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("*"); /* add */ mm.push(); break lex; } mm.workspace.append(" << unknown command on line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of source file. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; // ---------------------------------- // PARSING PHASE: // Below is the parse/compile phase of the script. Here we pop tokens off the // stack and check for sequences of tokens eg "word*semicolon*". If we find a // valid series of tokens, we "shift-reduce" or "resolve" the token series eg // word*semicolon* --> command* // At the same time, we manipulate (transform) the attributes on the tape, as // required. } parse: while (true) { //------------------------------------- // 2 tokens //------------------------------------- mm.pop(); mm.pop(); // All of the patterns below are currently errors, but may not // be in the future if we expand the syntax of the parse // language. Also consider: // begintext* endtext* quoteset* notclass*, !* ,* ;* B* E* // It is nice to trap the errors here because we can emit some // (hopefully not very cryptic) error messages with a line number. // Otherwise the script writer has to debug with // pep -a asm.pp -I scriptfile if (mm.workspace.toString().equals("word*word*") || mm.workspace.toString().equals("word*}*") || mm.workspace.toString().equals("word*begintext*") || mm.workspace.toString().equals("word*endtext*") || mm.workspace.toString().equals("word*!*") || mm.workspace.toString().equals("word*,*") || mm.workspace.toString().equals("quote*word*") || mm.workspace.toString().equals("quote*class*") || mm.workspace.toString().equals("quote*state*") || mm.workspace.toString().equals("quote*}*") || mm.workspace.toString().equals("quote*begintext*") || mm.workspace.toString().equals("quote*endtext*") || mm.workspace.toString().equals("class*word*") || mm.workspace.toString().equals("class*quote*") || mm.workspace.toString().equals("class*class*") || mm.workspace.toString().equals("class*state*") || mm.workspace.toString().equals("class*}*") || mm.workspace.toString().equals("class*begintext*") || mm.workspace.toString().equals("class*endtext*") || mm.workspace.toString().equals("class*!*") || mm.workspace.toString().equals("notclass*word*") || mm.workspace.toString().equals("notclass*quote*") || mm.workspace.toString().equals("notclass*class*") || mm.workspace.toString().equals("notclass*state*") || mm.workspace.toString().equals("notclass*}*")) { mm.workspace.append(" (Token stack) \nValue: \n"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\nValue: \n"); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append("\n"); /* add */ mm.workspace.append("Error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of pep script (missing semicolon?) \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("{*;*") || mm.workspace.toString().equals(";*;*") || mm.workspace.toString().equals("}*;*")) { mm.push(); mm.push(); mm.workspace.append("Error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of pep script: misplaced semi-colon? ; \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals(",*{*")) { mm.push(); mm.push(); mm.workspace.append("Error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: extra comma in list? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("command*;*") || mm.workspace.toString().equals("commandset*;*")) { mm.push(); mm.push(); mm.workspace.append("Error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: extra semi-colon? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("!*!*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: \n double negation '!!' is not implemented \n"); /* add */ mm.workspace.append(" and probably won't be, because what would be the point? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("!*{*") || mm.workspace.toString().equals("!*;*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: misplaced negation operator (!)? \n"); /* add */ mm.workspace.append(" The negation operator precedes tests, for example: \n"); /* add */ mm.workspace.append(" !B'abc'{ ... } or !(eof),!'abc'{ ... } \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals(",*command*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: misplaced comma? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("!*command*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (at char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") \n"); /* add */ mm.workspace.append(" The negation operator (!) cannot precede a command \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals(";*{*") || mm.workspace.toString().equals("command*{*") || mm.workspace.toString().equals("commandset*{*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: no test for brace block? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("{*}*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script: empty braces {}. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("B*class*") || mm.workspace.toString().equals("E*class*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script:\n classes ([a-z], [:space:] etc). \n"); /* add */ mm.workspace.append(" cannot use the 'begin' or 'end' modifiers (B/E) \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("comment*{*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script: comments cannot occur between \n"); /* add */ mm.workspace.append(" a test and a brace ({). \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("}*command*")) { mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script: extra closing brace '}' ?. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } //------------ // The .restart command jumps to the first instruction after the // begin block (if there is a begin block), or the first instruction // of the script. if (mm.workspace.toString().equals(".*word*")) { mm.workspace.setLength(0); /* clear */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.workspace.toString().equals("restart")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("continue script;"); /* add */ // not required because we have labelled loops, // continue script works both before and after the parse> label // "0" { clear; add "continue script;"; } // "1" { clear; add "break lex;"; } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("reparse")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.accumulator); /* count */ // check accumulator to see if we are in the "lex" block // or the "parse" block and adjust the .reparse compilation // accordingly. if (mm.workspace.toString().equals("0")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("break lex;"); /* add */ } if (mm.workspace.toString().equals("1")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("continue parse;"); /* add */ } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } mm.push(); mm.push(); mm.workspace.append("error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script: \n"); /* add */ mm.workspace.append(" misplaced dot '.' (use for AND logic or in .reparse/.restart \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } //--------------------------------- // Compiling comments so as to transfer them to the java if (mm.workspace.toString().equals("comment*command*") || mm.workspace.toString().equals("command*comment*") || mm.workspace.toString().equals("commandset*comment*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("comment*comment*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("comment*"); /* add */ mm.push(); continue parse; } // ----------------------- // negated tokens. // This is a new more elegant way to negate a whole set of // tests (tokens) where the negation logic is stored on the // stack, not in the current tape cell. We just add "not" to // the stack token. // eg: ![:alpha:] ![a-z] ![abcd] !"abc" !B"abc" !E"xyz" // This format is used to indicate a negative test for // a brace block. eg: ![aeiou] { add "< not a vowel"; print; clear; } if (mm.workspace.toString().equals("!*quote*") || mm.workspace.toString().equals("!*class*") || mm.workspace.toString().equals("!*begintext*") || mm.workspace.toString().equals("!*endtext*") || mm.workspace.toString().equals("!*eof*") || mm.workspace.toString().equals("!*tapetest*")) { // a simplification: store the token name "quote*/class*/..." // in the tape cell corresponding to the "!*" token. /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("!*", "not"); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.push(); // this was a bug?? a missing ++; ?? // now get the token-value mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.increment(); /* ++ */ mm.workspace.setLength(0); /* clear */ continue parse; } //----------------------------------------- // format: E"text" or E'text' // This format is used to indicate a "workspace-ends-with" text before // a brace block. if (mm.workspace.toString().equals("E*quote*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("endtext*"); /* add */ mm.push(); mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("\"\"")) { // empty argument is an error mm.workspace.setLength(0); /* clear */ mm.workspace.append("pep script error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" empty argument for end-test (E\"\") \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.increment(); /* ++ */ mm.workspace.setLength(0); /* clear */ continue parse; } //----------------------------------------- // format: B"sometext" or B'sometext' // A 'B' preceding some quoted text is used to indicate a // 'workspace-begins-with' test, before a brace block. if (mm.workspace.toString().equals("B*quote*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("begintext*"); /* add */ mm.push(); mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("\"\"")) { // empty argument is an error mm.workspace.setLength(0); /* clear */ mm.workspace.append("pep script error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" empty argument for begin-test (B\"\") \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.increment(); /* ++ */ mm.workspace.setLength(0); /* clear */ continue parse; } //-------------------------------------------- // ebnf: command := word, ';' ; // formats: "pop; push; clear; print; " etc // all commands need to end with a semi-colon except for // .reparse and .restart if (mm.workspace.toString().equals("word*;*")) { mm.workspace.setLength(0); /* clear */ // check if command requires parameter mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("add") || mm.workspace.toString().equals("while") || mm.workspace.toString().equals("whilenot") || mm.workspace.toString().equals("mark") || mm.workspace.toString().equals("go") || mm.workspace.toString().equals("escape") || mm.workspace.toString().equals("unescape") || mm.workspace.toString().equals("delim") || mm.workspace.toString().equals("replace")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("'"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("'"); /* add */ mm.workspace.append(" command needs an argument, on line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script.\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // the new until; command with no argument if (mm.workspace.toString().equals("until")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.until(mm.tape.get(mm.tapePointer)); /* until (tape) */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("clip")) { mm.workspace.setLength(0); /* clear */ // are these length tests really necessary mm.workspace.append("if (mm.workspace.length() > 0) { /* clip */\n"); /* add */ mm.workspace.append(" mm.workspace.delete(mm.workspace.length() - 1, \n"); /* add */ mm.workspace.append(" mm.workspace.length()); }"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("clop")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("if (mm.workspace.length() > 0) { /* clop */\n"); /* add */ mm.workspace.append(" mm.workspace.delete(0, 1); } /* clop */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("clear")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.setLength(0);"); /* add */ mm.workspace.append(" /* clear */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("upper")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* upper */ \n"); /* add */ mm.workspace.append("for (int i = 0; i < mm.workspace.length(); i++) { \n"); /* add */ mm.workspace.append(" char c = mm.workspace.charAt(i); \n"); /* add */ mm.workspace.append(" mm.workspace.setCharAt(i, Character.toUpperCase(c)); } "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("lower")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* lower */ \n"); /* add */ mm.workspace.append("for (int i = 0; i < mm.workspace.length(); i++) { \n"); /* add */ mm.workspace.append(" char c = mm.workspace.charAt(i); \n"); /* add */ mm.workspace.append(" mm.workspace.setCharAt(i, Character.toLowerCase(c)); } "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("cap")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* cap */ \n"); /* add */ mm.workspace.append("for (int i = 0; i < mm.workspace.length(); i++) { \n"); /* add */ mm.workspace.append(" char c = mm.workspace.charAt(i); \n"); /* add */ mm.workspace.append(" if (i==0){ mm.workspace.setCharAt(i, Character.toUpperCase(c)); } \n"); /* add */ mm.workspace.append(" else { mm.workspace.setCharAt(i, Character.toLowerCase(c)); } \n"); /* add */ mm.workspace.append("}"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("print")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("System.out.print(mm.workspace); /* print */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("pop")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.pop();"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("push")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.push();"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("unstack")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("while (mm.pop()); /* unstack */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("stack")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("while(mm.push()); /* stack */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("put")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.tape.get(mm.tapePointer).setLength(0); /* put */\n"); /* add */ mm.workspace.append("mm.tape.get(mm.tapePointer).append(mm.workspace); "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("get")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("swap")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.swap();"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("++")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.increment();"); /* add */ mm.workspace.append(" /* ++ */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("--")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("if (mm.tapePointer > 0) mm.tapePointer--; /* -- */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("read")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.read(); /* read */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("count")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.append(mm.accumulator); /* count */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("a+")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.accumulator++; /* a+ */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("a-")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.accumulator--; /* a- */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("zero")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.accumulator = 0; /* zero */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("chars")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.append(mm.charsRead); /* chars */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("lines")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.append(mm.linesRead); /* lines */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("nochars")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.charsRead = 0; /* nochars */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("nolines")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.linesRead = 0; /* nolines */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } // use a labelled loop to quit script. if (mm.workspace.toString().equals("quit")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("break script;"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } if (mm.workspace.toString().equals("write")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.writeToFile();"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } // just eliminate since it does nothing. if (mm.workspace.toString().equals("nop")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* nop: no-operation eliminated */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); } mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } //----------------------------------------- // ebnf: commandset := command , command ; if (mm.workspace.toString().equals("command*command*") || mm.workspace.toString().equals("commandset*command*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("commandset*"); /* add */ mm.push(); // format the tape attributes. Add the next command on a newline if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.increment(); /* ++ */ mm.workspace.setLength(0); /* clear */ continue parse; } //------------------- // here we begin to parse "test*" and "ortestset*" and "andtestset*" // //------------------- // eg: B"abc" {} or E"xyz" {} // transform and markup the different test types if (mm.workspace.toString().equals("begintext*,*") || mm.workspace.toString().equals("endtext*,*") || mm.workspace.toString().equals("quote*,*") || mm.workspace.toString().equals("class*,*") || mm.workspace.toString().equals("eof*,*") || mm.workspace.toString().equals("tapetest*,*") || mm.workspace.toString().equals("begintext*.*") || mm.workspace.toString().equals("endtext*.*") || mm.workspace.toString().equals("quote*.*") || mm.workspace.toString().equals("class*.*") || mm.workspace.toString().equals("eof*.*") || mm.workspace.toString().equals("tapetest*.*") || mm.workspace.toString().equals("begintext*{*") || mm.workspace.toString().equals("endtext*{*") || mm.workspace.toString().equals("quote*{*") || mm.workspace.toString().equals("class*{*") || mm.workspace.toString().equals("eof*{*") || mm.workspace.toString().equals("tapetest*{*")) { if (mm.workspace.toString().startsWith("begin")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.toString().startsWith("); /* add */ } if (mm.workspace.toString().startsWith("end")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.toString().endsWith("); /* add */ } if (mm.workspace.toString().startsWith("quote")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.toString().equals("); /* add */ } if (mm.workspace.toString().startsWith("class")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.toString().matches("); /* add */ } // clear the tapecell for testeof and testtape because // they take no arguments. if (mm.workspace.toString().startsWith("eof")) { mm.workspace.setLength(0); /* clear */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("mm.eof"); /* add */ } if (mm.workspace.toString().startsWith("tapetest")) { mm.workspace.setLength(0); /* clear */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("(mm.workspace.toString().equals(mm.tape.get(mm.tapePointer).toString())"); /* add */ } mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (!mm.workspace.toString().startsWith("mm.eof")) { mm.workspace.append(")"); /* add */ } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("test*"); /* add */ mm.push(); // the trick below pushes the right token back on the stack. mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("*"); /* add */ mm.push(); continue parse; } //------------------- // negated tests // eg: !B"xyz {} !(eof) {} !(==) {} // !E"xyz" {} // !"abc" {} // ![a-z] {} if (mm.workspace.toString().equals("notbegintext*,*") || mm.workspace.toString().equals("notendtext*,*") || mm.workspace.toString().equals("notquote*,*") || mm.workspace.toString().equals("notclass*,*") || mm.workspace.toString().equals("noteof*,*") || mm.workspace.toString().equals("nottapetest*,*") || mm.workspace.toString().equals("notbegintext*.*") || mm.workspace.toString().equals("notendtext*.*") || mm.workspace.toString().equals("notquote*.*") || mm.workspace.toString().equals("notclass*.*") || mm.workspace.toString().equals("noteof*.*") || mm.workspace.toString().equals("nottapetest*.*") || mm.workspace.toString().equals("notbegintext*{*") || mm.workspace.toString().equals("notendtext*{*") || mm.workspace.toString().equals("notquote*{*") || mm.workspace.toString().equals("notclass*{*") || mm.workspace.toString().equals("noteof*{*") || mm.workspace.toString().equals("nottapetest*{*")) { if (mm.workspace.toString().startsWith("notbegin")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("!mm.workspace.toString().startsWith("); /* add */ } if (mm.workspace.toString().startsWith("notend")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("!mm.workspace.toString().endsWith("); /* add */ } if (mm.workspace.toString().startsWith("notquote")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("!mm.workspace.toString().equals("); /* add */ } if (mm.workspace.toString().startsWith("notclass")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("!mm.workspace.toString().matches("); /* add */ } // clear the tapecell for testeof and testtape because // they take no arguments. if (mm.workspace.toString().startsWith("noteof")) { mm.workspace.setLength(0); /* clear */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("!mm.eof"); /* add */ } if (mm.workspace.toString().startsWith("nottapetest")) { mm.workspace.setLength(0); /* clear */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.append("(!mm.workspace.toString().equals(mm.tape.get(mm.tapePointer).toString())"); /* add */ } mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (!mm.workspace.toString().startsWith("!mm.eof")) { mm.workspace.append(")"); /* add */ } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("test*"); /* add */ mm.push(); // the trick below pushes the right token back on the stack. mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("*"); /* add */ mm.push(); continue parse; } //------------------- // 3 tokens //------------------- mm.pop(); //----------------------------- // some 3 token errors!!! // not a comprehensive list of 3 token errors if (mm.workspace.toString().equals("{*quote*;*") || mm.workspace.toString().equals("{*begintext*;*") || mm.workspace.toString().equals("{*endtext*;*") || mm.workspace.toString().equals("{*class*;*") || mm.workspace.toString().equals("commandset*quote*;*") || mm.workspace.toString().equals("command*quote*;*")) { mm.push(); mm.push(); mm.push(); mm.workspace.append("[pep error]\n invalid syntax near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(")"); /* add */ mm.workspace.append(" of script (misplaced semicolon?) \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // to simplify subsequent tests, transmogrify a single command // to a commandset (multiple commands). if (mm.workspace.toString().equals("{*command*}*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("{*commandset*}*"); /* add */ mm.push(); mm.push(); mm.push(); continue parse; } // errors! mixing AND and OR concatenation if (mm.workspace.toString().equals(",*andtestset*{*") || mm.workspace.toString().equals(".*ortestset*{*")) { // push the tokens back to make debugging easier mm.push(); mm.push(); mm.push(); mm.workspace.append(" error: mixing AND (.) and OR (,) concatenation in \n"); /* add */ mm.workspace.append(" in pep script near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append(") \n"); /* add */ mm.workspace.append(" "); mm.workspace.append("\n For example:"); mm.workspace.append("\n B\".\".!E\"/\".[abcd./] { print; } # Correct!"); mm.workspace.append("\n B\".\".!E\"/\",[abcd./] { print; } # Error! \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } //-------------------------------------------- // ebnf: command := keyword , quoted-text , ";" ; // format: add "text"; if (mm.workspace.toString().equals("word*quote*;*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("replace")) { // error mm.workspace.append("< command requires 2 parameters, not 1 \n"); /* add */ mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // check whether argument is single character, otherwise // throw an error if (mm.workspace.toString().equals("escape") || mm.workspace.toString().equals("unescape") || mm.workspace.toString().equals("while") || mm.workspace.toString().equals("whilenot")) { // This is trickier than I thought it would be. mm.workspace.setLength(0); /* clear */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ // check that arg not empty, (but an empty quote is ok // for the second arg of 'replace' if (mm.workspace.toString().equals("\"\"")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("[pep error] near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (or char "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" command '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\' cannot have an empty argument (\"\") \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } // quoted text has the quotes still around it. // also handle escape characters like \n \r etc if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ if (mm.workspace.length() > 0) { /* clop */ mm.workspace.delete(0, 1); } /* clop */ // B "\\" { clip; } if (mm.workspace.length() > 0) { /* clip */ mm.workspace.delete(mm.workspace.length() - 1, mm.workspace.length()); } if (!mm.workspace.toString().equals("")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Pep script error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" command '"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("' takes only a single character argument. \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ } if (mm.workspace.toString().equals("mark")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* mark */ \n"); /* add */ mm.workspace.append("mm.marks.get(mm.tapePointer).setLength(0); // mark \n"); /* add */ mm.workspace.append("mm.marks.get(mm.tapePointer).append("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append("); // mark"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("go")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.goToMark("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append("); /* go */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("delim")) { mm.workspace.setLength(0); /* clear */ // this.delimiter.setCharAt(0, text.charAt(0)); // only the first character of the delimiter argument is used. mm.workspace.append("mm.delimiter.setLength(0); /* delim */\n"); /* add */ mm.workspace.append("mm.delimiter.append("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append("); "); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("add")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.workspace.append("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ // handle multiline text /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\"); \nmm.workspace.append(\"\\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.workspace.append("); /* add */"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("while")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("while ((char) mm.peep == "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(".charAt(0)) /* while */\n "); /* add */ mm.workspace.append(" { if (mm.eof) {break;} mm.read(); }"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("whilenot")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("while ((char) mm.peep != "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(".charAt(0)) /* whilenot */\n "); /* add */ mm.workspace.append(" { if (mm.eof) {break;} mm.read(); }"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("until")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm.until("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ // error until cannot have empty argument if (mm.workspace.toString().equals("mm.until(\"\"")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("Pep script error near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" empty argument for 'until' \n"); /* add */ mm.workspace.append(" "); mm.workspace.append("\n For example:"); mm.workspace.append("\n until '.txt'; until \">\"; # correct "); mm.workspace.append("\n until ''; until \"\"; # errors! \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } // handle multiline argument /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\\n"); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.workspace.append(");"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } // But really, can't the "replace" command just be used // instead of escape/unescape?? This seems a flaw in the // machine design. if (mm.workspace.toString().equals("escape") || mm.workspace.toString().equals("unescape")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("mm."); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("Char"); /* add */ mm.workspace.append("("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(".charAt(0));"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } // error, superfluous argument mm.workspace.append(": command does not take an argument \n"); /* add */ mm.workspace.append("near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" of script. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ //state break script; } //---------------------------------- // format: "while [:alpha:] ;" or whilenot [a-z] ; if (mm.workspace.toString().equals("word*class*;*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("while")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* while */ \n"); /* add */ mm.workspace.append("while (Character.toString((char)mm.peep).matches("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(")) { if (mm.eof) { break; } mm.read(); }"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("whilenot")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* whilenot */ \n"); /* add */ mm.workspace.append("while (!Character.toString((char)mm.peep).matches("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(")) { if (mm.eof) { break; } mm.read(); }"); /* add */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } // error mm.workspace.append(" < command cannot have a class argument \n"); /* add */ mm.workspace.append("line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(": error in script \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // arrange the parse> label loops if (mm.eof) { if (mm.workspace.toString().equals("commandset*parse>*commandset*") || mm.workspace.toString().equals("command*parse>*commandset*") || mm.workspace.toString().equals("commandset*parse>*command*") || mm.workspace.toString().equals("command*parse>*command*")) { mm.workspace.setLength(0); /* clear */ // indent both code blocks mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ // add a block so that .reparse works before the parse> label. mm.workspace.append("lex: { \n"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n}\n"); /* add */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ // indent code block // add " "; get; replace "\n" "\n "; put; clear; mm.workspace.append("parse: \n"); /* add */ mm.workspace.append("while (true) { \n"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n break parse;\n}"); /* add */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("commandset*"); /* add */ mm.push(); continue parse; } } // ------------------------------- // 4 tokens // ------------------------------- mm.pop(); //------------------------------------- // bnf: command := replace , quote , quote , ";" ; // example: replace "and" "AND" ; if (mm.workspace.toString().equals("word*quote*quote*;*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.workspace.toString().equals("replace")) { //--------------------------- // a command plus 2 arguments, eg replace "this" "that" mm.workspace.setLength(0); /* clear */ mm.workspace.append("/* replace */ \n"); /* add */ mm.workspace.append("if (mm.workspace.length() > 0) { \n"); /* add */ mm.workspace.append(" temp = mm.workspace.toString().replace("); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(", "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(");\n"); /* add */ mm.workspace.append(" mm.workspace.setLength(0); \n"); /* add */ mm.workspace.append(" mm.workspace.append(temp);\n} "); /* add */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); continue parse; } mm.workspace.append("pep script error on line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(" (character "); /* add */ mm.workspace.append(mm.charsRead); /* chars */ mm.workspace.append("): \n"); /* add */ mm.workspace.append(" command does not take 2 quoted arguments. \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } //------------------------------------- // format: begin { #* commands *# } // "begin" blocks which are only executed once (they // will are assembled before the "start:" label. They must come before // all other commands. // "begin*{*command*}*", if (mm.workspace.toString().equals("begin*{*commandset*}*")) { mm.workspace.setLength(0); /* clear */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("beginblock*"); /* add */ mm.push(); continue parse; } // ------------- // parses and compiles concatenated tests // eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... // these 2 tests should be all that is necessary if (mm.workspace.toString().equals("test*,*ortestset*{*") || mm.workspace.toString().equals("test*,*test*{*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(" || "); /* add */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("ortestset*{*"); /* add */ mm.push(); mm.push(); continue parse; } // dont mix AND and OR concatenations // ------------- // AND logic // parses and compiles concatenated AND tests // eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... // it is possible to elide this block with the negated block // for compactness but maybe readability is not as good. // negated tests can be chained with non negated tests. // eg: B'http' . !E'.txt' { ... } if (mm.workspace.toString().equals("test*.*andtestset*{*") || mm.workspace.toString().equals("test*.*test*{*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(" && "); /* add */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("andtestset*{*"); /* add */ mm.push(); mm.push(); continue parse; } //------------------------------------- // we should not have to check for the {*command*}* pattern // because that has already been transformed to {*commandset*}* if (mm.workspace.toString().equals("test*{*commandset*}*") || mm.workspace.toString().equals("andtestset*{*commandset*}*") || mm.workspace.toString().equals("ortestset*{*commandset*}*")) { mm.workspace.setLength(0); /* clear */ // indent the java code for readability mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.setLength(0); /* clear */ mm.workspace.append("if ("); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append(") {\n"); /* add */ mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n}"); /* add */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("command*"); /* add */ mm.push(); // always reparse/compile continue parse; } // ------------- // multi-token end-of-stream errors // not a comprehensive list of errors... if (mm.eof) { if (mm.workspace.toString().endsWith("begintext*") || mm.workspace.toString().endsWith("endtext*") || mm.workspace.toString().endsWith("test*") || mm.workspace.toString().endsWith("ortestset*") || mm.workspace.toString().endsWith("andtestset*")) { mm.workspace.append(" Error near end of script at line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(". Test with no brace block? \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().endsWith("quote*") || mm.workspace.toString().endsWith("class*") || mm.workspace.toString().endsWith("word*")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("Error at end of pep script near line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append(": missing semi-colon? \n"); /* add */ mm.workspace.append("Parse stack: "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().endsWith("{*") || mm.workspace.toString().endsWith("}*") || mm.workspace.toString().endsWith(";*") || mm.workspace.toString().endsWith(",*") || mm.workspace.toString().endsWith(".*") || mm.workspace.toString().endsWith("!*") || mm.workspace.toString().endsWith("B*") || mm.workspace.toString().endsWith("E*")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("Error: misplaced terminal character at end of script! (line "); /* add */ mm.workspace.append(mm.linesRead); /* lines */ mm.workspace.append("). \n"); /* add */ mm.workspace.append("Parse stack: "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } } // put the 4 (or less) tokens back on the stack mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) { System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ // create the virtual machine object code and save it // somewhere on the tape. mm.workspace.append(""); mm.workspace.append("\n"); mm.workspace.append("\n /* Java code generated by \"translate.java.pss\" */"); mm.workspace.append("\n import java.io.*;"); mm.workspace.append("\n import java.util.regex.*;"); mm.workspace.append("\n import java.util.*; // contains stack"); mm.workspace.append("\n"); mm.workspace.append("\n public class Machine {"); mm.workspace.append("\n // using int instead of char so that all unicode code points are"); mm.workspace.append("\n // available instead of just utf16. (emojis cant fit into utf16)"); mm.workspace.append("\n private int accumulator; // counter for anything"); mm.workspace.append("\n private int peep; // next char in input stream"); mm.workspace.append("\n private int charsRead; // No. of chars read so far"); mm.workspace.append("\n private int linesRead; // No. of lines read so far"); mm.workspace.append("\n public StringBuffer workspace; // text accumulator"); mm.workspace.append("\n private Stack stack; // parse token stack"); mm.workspace.append("\n private int LENGTH; // tape initial length"); mm.workspace.append("\n"); mm.workspace.append("\n // use ArrayLists instead with .add() .get(n) and .set(n, E)"); mm.workspace.append("\n // ArrayList al=new ArrayList();"); mm.workspace.append("\n private List tape; // array of token attributes "); mm.workspace.append("\n private List marks; // tape marks"); mm.workspace.append("\n private int tapePointer; // pointer to current cell"); mm.workspace.append("\n private Reader input; // text input stream"); mm.workspace.append("\n private boolean eof; // end of stream reached?"); mm.workspace.append("\n private boolean flag; // not used here"); mm.workspace.append("\n private StringBuffer escape; // char used to \"escape\" others \"\\\""); mm.workspace.append("\n private StringBuffer delimiter; // push/pop delimiter (default is \"*\")"); mm.workspace.append("\n private boolean markFound; // if the mark was found in tape"); mm.workspace.append("\n "); mm.workspace.append("\n /** make a new machine with a character stream reader */"); mm.workspace.append("\n public Machine(Reader reader) {"); mm.workspace.append("\n this.markFound = false; "); mm.workspace.append("\n this.LENGTH = 100;"); mm.workspace.append("\n this.input = reader;"); mm.workspace.append("\n this.eof = false;"); mm.workspace.append("\n this.flag = false;"); mm.workspace.append("\n this.charsRead = 0; "); mm.workspace.append("\n this.linesRead = 1; "); mm.workspace.append("\n this.escape = new StringBuffer(\"\\\\\");"); mm.workspace.append("\n this.delimiter = new StringBuffer(\"*\");"); mm.workspace.append("\n this.accumulator = 0;"); mm.workspace.append("\n this.workspace = new StringBuffer(\"\");"); mm.workspace.append("\n this.stack = new Stack();"); mm.workspace.append("\n this.tapePointer = 0;"); mm.workspace.append("\n this.tape = new ArrayList();"); mm.workspace.append("\n this.marks = new ArrayList();"); mm.workspace.append("\n for (int ii = 0; ii < this.LENGTH; ii++) {"); mm.workspace.append("\n this.tape.add(new StringBuffer(\"\"));"); mm.workspace.append("\n this.marks.add(new StringBuffer(\"\"));"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n try"); mm.workspace.append("\n { this.peep = this.input.read(); } "); mm.workspace.append("\n catch (java.io.IOException ex) {"); mm.workspace.append("\n System.out.println(\"read error\");"); mm.workspace.append("\n System.exit(-1);"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** read one character from the input stream and "); mm.workspace.append("\n update the machine. */"); mm.workspace.append("\n public void read() {"); mm.workspace.append("\n int iChar;"); mm.workspace.append("\n try {"); mm.workspace.append("\n if (this.eof) { System.exit(0); }"); mm.workspace.append("\n this.charsRead++;"); mm.workspace.append("\n // increment lines"); mm.workspace.append("\n if ((char)this.peep == \'\\n\') { this.linesRead++; }"); mm.workspace.append("\n this.workspace.append(Character.toChars(this.peep));"); mm.workspace.append("\n this.peep = this.input.read(); "); mm.workspace.append("\n if (this.peep == -1) { this.eof = true; }"); mm.workspace.append("\n }"); mm.workspace.append("\n catch (IOException ex) {"); mm.workspace.append("\n System.out.println(\"Error reading input stream\" + ex);"); mm.workspace.append("\n System.exit(-1);"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** increment tape pointer by one */"); mm.workspace.append("\n public void increment() {"); mm.workspace.append("\n this.tapePointer++;"); mm.workspace.append("\n if (this.tapePointer >= this.LENGTH) {"); mm.workspace.append("\n this.tape.add(new StringBuffer(\"\"));"); mm.workspace.append("\n this.marks.add(new StringBuffer(\"\"));"); mm.workspace.append("\n this.LENGTH++;"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n "); mm.workspace.append("\n /** remove escape character */"); mm.workspace.append("\n public void unescapeChar(char c) {"); mm.workspace.append("\n if (workspace.length() > 0) {"); mm.workspace.append("\n String s = this.workspace.toString().replace(\"\\\\\"+c, c+\"\");"); mm.workspace.append("\n this.workspace.setLength(0); workspace.append(s);"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** add escape character */"); mm.workspace.append("\n public void escapeChar(char c) {"); mm.workspace.append("\n if (workspace.length() > 0) {"); mm.workspace.append("\n String s = this.workspace.toString().replace(c+\"\", \"\\\\\"+c);"); mm.workspace.append("\n workspace.setLength(0); workspace.append(s);"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** whether trailing escapes \\\\ are even or odd */"); mm.workspace.append("\n // untested code. check! eg try: add \"x \\\\\"; print; etc"); mm.workspace.append("\n public boolean isEscaped(String ss, String sSuffix) {"); mm.workspace.append("\n int count = 0; "); mm.workspace.append("\n if (ss.length() < 2) return false;"); mm.workspace.append("\n if (ss.length() <= sSuffix.length()) return false;"); mm.workspace.append("\n if (ss.indexOf(this.escape.toString().charAt(0)) == -1) "); mm.workspace.append("\n { return false; }"); mm.workspace.append("\n"); mm.workspace.append("\n int pos = ss.length()-sSuffix.length();"); mm.workspace.append("\n while ((pos > -1) && (ss.charAt(pos) == this.escape.toString().charAt(0))) {"); mm.workspace.append("\n count++; pos--;"); mm.workspace.append("\n }"); mm.workspace.append("\n if (count % 2 == 0) return false;"); mm.workspace.append("\n return true;"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /* a helper to see how many trailing \\\\ escape chars */"); mm.workspace.append("\n private int countEscaped(String sSuffix) {"); mm.workspace.append("\n String s = \"\";"); mm.workspace.append("\n int count = 0;"); mm.workspace.append("\n int index = this.workspace.toString().lastIndexOf(sSuffix);"); mm.workspace.append("\n // remove suffix if it exists"); mm.workspace.append("\n if (index > 0) {"); mm.workspace.append("\n s = this.workspace.toString().substring(0, index);"); mm.workspace.append("\n }"); mm.workspace.append("\n while (s.endsWith(this.escape.toString())) {"); mm.workspace.append("\n count++;"); mm.workspace.append("\n s = s.substring(0, s.lastIndexOf(this.escape.toString()));"); mm.workspace.append("\n }"); mm.workspace.append("\n return count;"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** reads the input stream until the workspace end with text */"); mm.workspace.append("\n // can test this with"); mm.workspace.append("\n public void until(String sSuffix) {"); mm.workspace.append("\n // read at least one character"); mm.workspace.append("\n if (this.eof) return; "); mm.workspace.append("\n this.read();"); mm.workspace.append("\n while (true) {"); mm.workspace.append("\n if (this.eof) return;"); mm.workspace.append("\n if (this.workspace.toString().endsWith(sSuffix)) {"); mm.workspace.append("\n if (this.countEscaped(sSuffix) % 2 == 0) { return; }"); mm.workspace.append("\n }"); mm.workspace.append("\n this.read();"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** pop the first token from the stack into the workspace */"); mm.workspace.append("\n public Boolean pop() {"); mm.workspace.append("\n if (this.stack.isEmpty()) return false;"); mm.workspace.append("\n this.workspace.insert(0, this.stack.pop()); "); mm.workspace.append("\n if (this.tapePointer > 0) this.tapePointer--;"); mm.workspace.append("\n return true;"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** push the first token from the workspace to the stack */"); mm.workspace.append("\n public Boolean push() {"); mm.workspace.append("\n String sItem;"); mm.workspace.append("\n // dont increment the tape pointer on an empty push"); mm.workspace.append("\n if (this.workspace.length() == 0) return false;"); mm.workspace.append("\n // need to get this from this.delim not \"*\""); mm.workspace.append("\n int iFirstStar = "); mm.workspace.append("\n this.workspace.indexOf(this.delimiter.toString());"); mm.workspace.append("\n if (iFirstStar != -1) {"); mm.workspace.append("\n sItem = this.workspace.toString().substring(0, iFirstStar + 1);"); mm.workspace.append("\n this.workspace.delete(0, iFirstStar + 1);"); mm.workspace.append("\n }"); mm.workspace.append("\n else {"); mm.workspace.append("\n sItem = this.workspace.toString();"); mm.workspace.append("\n this.workspace.setLength(0);"); mm.workspace.append("\n }"); mm.workspace.append("\n this.stack.push(sItem); "); mm.workspace.append("\n this.increment(); "); mm.workspace.append("\n return true;"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** swap current tape cell with the workspace */"); mm.workspace.append("\n public void swap() {"); mm.workspace.append("\n String s = new String(this.workspace);"); mm.workspace.append("\n this.workspace.setLength(0);"); mm.workspace.append("\n this.workspace.append(this.tape.get(this.tapePointer).toString());"); mm.workspace.append("\n this.tape.get(this.tapePointer).setLength(0);"); mm.workspace.append("\n this.tape.get(this.tapePointer).append(s);"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** save the workspace to file \"sav.pp\" */"); mm.workspace.append("\n public void writeToFile() {"); mm.workspace.append("\n try {"); mm.workspace.append("\n File file = new File(\"sav.pp\");"); mm.workspace.append("\n Writer out = new BufferedWriter(new OutputStreamWriter("); mm.workspace.append("\n new FileOutputStream(file), \"UTF8\"));"); mm.workspace.append("\n out.append(this.workspace.toString());"); mm.workspace.append("\n out.flush(); out.close();"); mm.workspace.append("\n } catch (Exception e) { "); mm.workspace.append("\n System.out.println(e.getMessage());"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n public void goToMark(String mark) {"); mm.workspace.append("\n this.markFound = false; "); mm.workspace.append("\n for (var ii = 0; ii < this.marks.size(); ii++) {"); mm.workspace.append("\n if (this.marks.get(ii).toString().equals(mark)) { "); mm.workspace.append("\n this.tapePointer = ii; this.markFound = true; "); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n if (this.markFound == false) { "); mm.workspace.append("\n System.out.print(\"badmark \'\" + mark + \"\'!\"); "); mm.workspace.append("\n System.exit(1);"); mm.workspace.append("\n }"); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n /** parse/check/compile the input */"); mm.workspace.append("\n public void parse(InputStreamReader input) {"); mm.workspace.append("\n //this is where the actual parsing/compiling code should go "); mm.workspace.append("\n //but this means that all generated code must use"); mm.workspace.append("\n //\"this.\" not \"mm.\""); mm.workspace.append("\n }"); mm.workspace.append("\n"); mm.workspace.append("\n public static void main(String[] args) throws Exception { "); mm.workspace.append("\n String temp = \"\"; "); mm.workspace.append("\n Machine mm = new Machine(new InputStreamReader(System.in)); \n"); /* add */ // save the code in the current tape cell mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ //--------------------- // check if the script correctly parsed (there should only // be one token on the stack, namely "commandset*" or "command*"). mm.pop(); mm.pop(); if (mm.workspace.toString().equals("commandset*") || mm.workspace.toString().equals("command*")) { mm.workspace.setLength(0); /* clear */ // indent generated code (6 spaces) for readability. mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // restore the java preamble from the tape mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(""); mm.workspace.append("\n script: "); mm.workspace.append("\n while (!mm.eof) {\n"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n }"); /* add */ mm.workspace.append("\n }"); /* add */ mm.workspace.append("\n}\n"); /* add */ // put a copy of the final compilation into the tapecell // so it can be inspected interactively. mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("beginblock*commandset*") || mm.workspace.toString().equals("beginblock*command*")) { mm.workspace.setLength(0); /* clear */ // indent begin block code mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ // indent main code for readability. mm.increment(); /* ++ */ mm.workspace.append(" "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ /* replace */ if (mm.workspace.length() > 0) { temp = mm.workspace.toString().replace("\n", "\n "); mm.workspace.setLength(0); mm.workspace.append(temp); } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ // get java preamble from tape mm.increment(); /* ++ */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ mm.increment(); /* ++ */ // a labelled loop for "quit" (but quit can just exit?) mm.workspace.append(" script: \n"); /* add */ mm.workspace.append(" while (!mm.eof) {\n"); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n }"); /* add */ mm.workspace.append("\n }"); /* add */ mm.workspace.append("\n}\n"); /* add */ // put a copy of the final compilation into the tapecell // for interactive debugging. mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } mm.push(); mm.push(); // try to explain some more errors while (mm.pop()); /* unstack */ if (mm.workspace.toString().startsWith("parse>")) { mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("[error] pep syntax error:\n"); /* add */ mm.workspace.append(" The parse> label cannot be the 1st item \n"); /* add */ mm.workspace.append(" of a script \n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.setLength(0); /* clear */ mm.workspace.append("After compiling with 'compile.java.pss' (at EOF): \n "); /* add */ mm.workspace.append(" parse error in input script. \n "); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ while (mm.pop()); /* unstack */ mm.tape.get(mm.tapePointer).setLength(0); /* put */ mm.tape.get(mm.tapePointer).append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("Parse stack: "); /* add */ mm.workspace.append(mm.tape.get(mm.tapePointer)); /* get */ mm.workspace.append("\n"); /* add */ mm.workspace.append(" * debug script "); /* add */ mm.workspace.append(" >> pep -If script -i 'some input' \n "); /* add */ mm.workspace.append(" * debug compilation. \n "); /* add */ mm.workspace.append(" >> pep -Ia asm.pp script' \n "); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // not eof // there is an implicit .restart command here (jump start) break parse; } } } }