/* Java code generated by "translate.java.pss" */ import java.io.*; import java.util.regex.*; import java.util.*; // contains stack public class naturallanguage { // using int instead of char so that all unicode code points are // available instead of just utf16. (emojis cant fit into utf16) private int accumulator; // counter for anything private int peep; // next char in input stream private int charsRead; // No. of chars read so far private int linesRead; // No. of lines read so far public StringBuffer workspace; // text accumulator private Stack stack; // parse token stack private static int LENGTH = 100; // tape maximum length private StringBuffer[] tape; // array of token attributes private StringBuffer[] marks; // tape marks private int tapePointer; // pointer to current cell private Reader input; // text input stream private boolean eof; // end of stream reached? private boolean flag; // not used here private StringBuffer escape; // char used to "escape" others "\" private StringBuffer delimiter; // push/pop delimiter (default is "*") /** make a new machine with a character stream reader */ public naturallanguage(Reader reader) { this.input = reader; this.eof = false; this.flag = false; this.charsRead = 0; this.linesRead = 1; this.escape = new StringBuffer("\\"); this.delimiter = new StringBuffer("*"); this.accumulator = 0; this.workspace = new StringBuffer(""); this.stack = new Stack(); this.tapePointer = 0; this.tape = new StringBuffer[LENGTH]; this.marks = new StringBuffer[LENGTH]; for (int ii = 0; ii < this.tape.length; ii++) { this.tape[ii] = new StringBuffer(); this.marks[ii] = new StringBuffer(); } try { this.peep = this.input.read(); } catch (java.io.IOException ex) { System.out.println("read error"); System.exit(-1); } } /** read one character from the input stream and update the machine. */ public void read() { int iChar; try { if (this.eof) { System.exit(0); } this.charsRead++; // increment lines if ((char)this.peep == '\n') { this.linesRead++; } this.workspace.append(Character.toChars(this.peep)); this.peep = this.input.read(); if (this.peep == -1) { this.eof = true; } } catch (IOException ex) { System.out.println("Error reading input stream" + ex); System.exit(-1); } } /** increment tape pointer by one */ public void increment() { this.tapePointer++; if (this.tapePointer > naturallanguage.LENGTH - 1) { System.out.println("Tape length exceeded [" + LENGTH + "]"); System.exit(1); } } /** remove escape character */ public void unescapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace("\\"+c, c+""); this.workspace.setLength(0); workspace.append(s); } } /** add escape character */ public void escapeChar(char c) { if (workspace.length() > 0) { String s = this.workspace.toString().replace(c+"", "\\"+c); workspace.setLength(0); workspace.append(s); } } /** whether trailing escapes \\ are even or odd */ // untested code. check! eg try: add "x \\"; print; etc public boolean isEscaped(String ss, String sSuffix) { int count = 0; if (ss.length() < 2) return false; if (ss.length() <= sSuffix.length()) return false; if (ss.indexOf(this.escape.toString().charAt(0)) == -1) { return false; } int pos = ss.length()-sSuffix.length(); while ((pos > -1) && (ss.charAt(pos) == this.escape.toString().charAt(0))) { count++; pos--; } if (count % 2 == 0) return false; return true; } /* a helper to see how many trailing \\ escape chars */ private int countEscaped(String sSuffix) { String s = ""; int count = 0; int index = this.workspace.toString().lastIndexOf(sSuffix); // remove suffix if it exists if (index > 0) { s = this.workspace.toString().substring(0, index); } while (s.endsWith(this.escape.toString())) { count++; s = s.substring(0, s.lastIndexOf(this.escape.toString())); } return count; } /** reads the input stream until the workspace end with text */ // can test this with public void until(String sSuffix) { // read at least one character if (this.eof) return; this.read(); while (true) { if (this.eof) return; if (this.workspace.toString().endsWith(sSuffix)) { if (this.countEscaped(sSuffix) % 2 == 0) { return; } } this.read(); } } /** pop the first token from the stack into the workspace */ public Boolean pop() { if (this.stack.isEmpty()) return false; this.workspace.insert(0, this.stack.pop()); if (this.tapePointer > 0) this.tapePointer--; return true; } /** push the first token from the workspace to the stack */ public Boolean push() { String sItem; // dont increment the tape pointer on an empty push if (this.workspace.length() == 0) return false; // need to get this from this.delim not "*" int iFirstStar = this.workspace.indexOf(this.delimiter.toString()); if (iFirstStar != -1) { sItem = this.workspace.toString().substring(0, iFirstStar + 1); this.workspace.delete(0, iFirstStar + 1); } else { sItem = this.workspace.toString(); this.workspace.setLength(0); } this.stack.push(sItem); this.increment(); return true; } /** swap current tape cell with the workspace */ public void swap() { String s = new String(this.workspace); this.workspace.setLength(0); this.workspace.append(this.tape[this.tapePointer].toString()); this.tape[this.tapePointer].setLength(0); this.tape[this.tapePointer].append(s); } /** save the workspace to file "sav.pp" */ public void writeToFile() { try { File file = new File("sav.pp"); Writer out = new BufferedWriter(new OutputStreamWriter( new FileOutputStream(file), "UTF8")); out.append(this.workspace.toString()); out.flush(); out.close(); } catch (Exception e) { System.out.println(e.getMessage()); } } /** parse/check/compile the input */ public void parse(InputStreamReader input) { //this is where the actual parsing/compiling code should go //but this means that all generated code must use //"this." not "mm." } public static void main(String[] args) throws Exception { String temp = ""; naturallanguage mm = new naturallanguage(new InputStreamReader(System.in)); mm.workspace.append(""); mm.workspace.append("\n An attempt at basic natural language parsing. "); mm.workspace.append("\n Use the following words in simple sentences: "); mm.workspace.append("\n"); mm.workspace.append("\n articles: the, this, her, his, a, one, some, "); mm.workspace.append("\n preposition: up, in, at, on, with, under, to"); mm.workspace.append("\n adjectives: simple, big, small, blue, beautiful, small,"); mm.workspace.append("\n nouns: flower, tree, dog, house, horse, girl, fish, meat,"); mm.workspace.append("\n verbs: runs, eats, sleeps, is, grows, digs, sings"); mm.workspace.append("\n"); mm.workspace.append("\n End the sentence with a full stop \".\""); mm.workspace.append("\n eg: the small dog eats fish."); mm.workspace.append("\n eg: the simple horse runs on the house ."); mm.workspace.append("\n .\n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ script: while (!mm.eof) { lex: { mm.read(); /* read */ if (mm.workspace.toString().matches("^\\p{Alpha}+$")) { /* while */ while (Character.toString((char)mm.peep).matches("^\\p{Alpha}+$")) { if (mm.eof) { break; } mm.read(); } mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); if (mm.workspace.toString().equals("the") || mm.workspace.toString().equals("this") || mm.workspace.toString().equals("her") || mm.workspace.toString().equals("his") || mm.workspace.toString().equals("a") || mm.workspace.toString().equals("one") || mm.workspace.toString().equals("some")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("article*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("up") || mm.workspace.toString().equals("in") || mm.workspace.toString().equals("at") || mm.workspace.toString().equals("on") || mm.workspace.toString().equals("with") || mm.workspace.toString().equals("under") || mm.workspace.toString().equals("to")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("preposition*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("simple") || mm.workspace.toString().equals("big") || mm.workspace.toString().equals("small") || mm.workspace.toString().equals("blue") || mm.workspace.toString().equals("beautiful") || mm.workspace.toString().equals("small")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("adjective*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("flower") || mm.workspace.toString().equals("tree") || mm.workspace.toString().equals("dog") || mm.workspace.toString().equals("house") || mm.workspace.toString().equals("horse") || mm.workspace.toString().equals("girl") || mm.workspace.toString().equals("fish") || mm.workspace.toString().equals("meat")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("noun*"); /* add */ mm.push(); break lex; } if (mm.workspace.toString().equals("runs") || mm.workspace.toString().equals("eats") || mm.workspace.toString().equals("sleeps") || mm.workspace.toString().equals("is") || mm.workspace.toString().equals("grows") || mm.workspace.toString().equals("digs") || mm.workspace.toString().equals("sings")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("verb*"); /* add */ mm.push(); break lex; } mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("<"); /* add */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(">"); /* add */ mm.workspace.append(" Sorry, don't understand that word! \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } // use a full-stop to complete sentence if (mm.workspace.toString().equals(".")) { mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("dot*"); /* add */ mm.push(); } // ignore every thing else mm.workspace.setLength(0); /* clear */ } parse: while (true) { // 2 tokens mm.pop(); mm.pop(); if (mm.workspace.toString().equals("article*noun*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("nounphrase*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("verb*preposition*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("verbphrase*"); /* add */ mm.push(); continue parse; } // 3 tokens mm.pop(); if (mm.workspace.toString().equals("noun*verb*dot*") || mm.workspace.toString().equals("nounphrase*verb*dot*") || mm.workspace.toString().equals("noun*verbphrase*dot*") || mm.workspace.toString().equals("nounphrase*verbphrase*dot*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("sentence*"); /* add */ mm.push(); continue parse; } if (mm.workspace.toString().equals("article*adjective*noun*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("nounphrase*"); /* add */ mm.push(); continue parse; } // 4 tokens mm.pop(); if (mm.workspace.toString().equals("nounphrase*verb*noun*dot*") || mm.workspace.toString().equals("noun*verb*noun*dot*") || mm.workspace.toString().equals("nounphrase*verb*nounphrase*dot*") || mm.workspace.toString().equals("noun*verb*nounphrase*dot*") || mm.workspace.toString().equals("nounphrase*verbphrase*nounphrase*dot*") || mm.workspace.toString().equals("noun*verbphrase*nounphrase*dot*") || mm.workspace.toString().equals("nounphrase*verbphrase*noun*dot*") || mm.workspace.toString().equals("noun*verbphrase*noun*dot*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(" "); /* add */ mm.increment(); /* ++ */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ if (mm.tapePointer > 0) mm.tapePointer--; /* -- */ mm.tape[mm.tapePointer].setLength(0); /* put */ mm.tape[mm.tapePointer].append(mm.workspace); mm.workspace.setLength(0); /* clear */ mm.workspace.append("sentence*"); /* add */ mm.push(); continue parse; } mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) { mm.pop(); mm.pop(); if (mm.workspace.toString().equals("sentence*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("It's an english sentence! \n("); /* add */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(") \n"); /* add */ mm.workspace.append("But it may not make sense! \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("nounphrase*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("its a noun-phrase! ("); /* add */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(") \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } if (mm.workspace.toString().equals("verbphrase*")) { mm.workspace.setLength(0); /* clear */ mm.workspace.append("its a verb-phrase! ("); /* add */ mm.workspace.append(mm.tape[mm.tapePointer]); /* get */ mm.workspace.append(") \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ break script; } mm.push(); mm.push(); mm.workspace.append("nope, not a sentence. \n"); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ mm.workspace.append("The parse stack was: \n "); /* add */ System.out.print(mm.workspace); /* print */ mm.workspace.setLength(0); /* clear */ while (mm.pop()); /* unstack */ mm.workspace.append("\n"); /* add */ System.out.print(mm.workspace); /* print */ break script; } break parse; } } } }