#!/usr/bin/env python3 # code generated by "translate.py.pss" a pep script # bumble.sf.net/books/pars/ import sys, re # for sys.read(), write() and regex from unicodedata import category # for matching classes class Machine: # make a new machine def __init__(self): self.size = 100 # how many elements in stack/tape/marks self.eof = False # end of stream reached? self.charsRead = 0 # how many chars already read self.linesRead = 1 # how many lines already read self.escape = "\\" self.delimiter = "*" # push/pop delimiter (default "*") self.counter = 0 # a counter for anything self.work = "" # the workspace self.stack = [] # stack for parse tokens self.cell = 0 # current tape cell self.tape = [""]*self.size # a list of attribute for tokens self.marks = [""]*self.size # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" self.peep = sys.stdin.read(1) def printSizeError(self): print("tape max size exceeded!"); print("tape maximum size = " + str(self.size)); print("tape cell (current) = " + str(self.cell)); print("You can increase the self.size value in the python script "); print("but normally this error indicates an error in your parsing "); print("script. The only exception would be massively nested structures"); print("in the source data."); def setInput(self, newInput): print("to be implemented") # read one character from the input stream and # update the machine. def read(self): if self.eof: System.exit(0) self.charsRead += 1; # increment lines if self.peep == "\n": self.linesRead += 1 self.work += self.peep self.peep = sys.stdin.read(1) if not self.peep: self.eof = True # increment tape pointer by one: trivial method in python def increment(self): self.cell += 1 # test if all chars in workspace are in unicode category def isInCategory(self, cat): for ch in self.work: if not category(ch).startswith(cat): return False return True # def # remove escape character: trivial method ? def unescapeChar(self, c): if len(self.work) > 0: self.work = self.work.replace("\\"+c, c) # add escape character : trivial def escapeChar(self, c): if len(self.work) > 0: self.work = self.work.replace(c, "\\"+c) # a helper function for the multiple escape char bug def countEscaped(self, suffix): count = 0 if self.work.endswith(suffix): s = self.work.removesuffix(suffix) while s.endswith(self.escape): count += 1 s = s.removesuffix(self.escape) return count # reads the input stream until the workspace end with text def until(self, suffix): # read at least one character if self.eof: return self.read() while True: if self.eof: return # no. bug! count the trailing escape chars, odd=continue, even=stop if self.work.endswith(suffix): #and (not self.work.endswith(self.escape + suffix)): if self.countEscaped(suffix) % 2 == 0: return self.read() # pop the first token from the stack into the workspace */ def pop(self): if len(self.stack) == 0: return False self.work = mm.stack.pop() + self.work if self.cell > 0: self.cell -= 1 return True # push the first token from the workspace to the stack def push(self): # dont increment the tape pointer on an empty push if len(self.work) == 0: return False # need to get this from the delimiter. iFirst = self.work.find(self.delimiter); if iFirst == -1: self.stack.append(self.work) self.work = "" return True self.stack.append(self.work[0:iFirst+1]) self.work = self.work[iFirst+1:] if self.cell < self.size: self.cell += 1 else: self.printSizeError(self); exit() return True # this function is not used (the code is "inlined") def swap(self): s = self.work self.work = self.tape[self.cell] self.tape[self.cell] = s def writeToFile(self): f = open("sav.pp", "w") f.write(self.work) f.close() def printState(self): print("Stack[" + ",".join(self.stack) + "] Work[" + self.work + "] Peep[" + self.peep + "]"); print("Acc:" + str(self.counter) + " Esc:" + self.escape + " Delim:" + self.delimiter + " Chars:" + str(self.charsRead) + " Lines:" + str(self.linesRead) + " Cell:" + str(self.cell)); # this is where the actual parsing/compiling code should go # so that it can be used by other python classes/objects. Also # should have a stream argument. def parse(self, s): # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file if isinstance(s, file): print("") # self.reset(s) # self.reader = s elif isinstance(s, string): f = StringIO.StringIO("test") for line in f: print(line) else: f = sys.stdin sys.stdout.write("not implemented") # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) temp = "" mm = Machine() mm.work += "" mm.work += "\n An attempt at basic natural language parsing. " mm.work += "\n Use the following words in simple sentences: " mm.work += "\n" mm.work += "\n articles: the, this, her, his, a, one, some, " mm.work += "\n preposition: up, in, at, on, with, under, to" mm.work += "\n adjectives: simple, big, small, blue, beautiful, small," mm.work += "\n nouns: flower, tree, dog, house, horse, girl, fish, meat," mm.work += "\n verbs: runs, eats, sleeps, is, grows, digs, sings" mm.work += "\n" mm.work += "\n End the sentence with a full stop \".\"" mm.work += "\n eg: the small dog eats fish." mm.work += "\n eg: the simple horse runs on the house ." mm.work += "\n .\n" sys.stdout.write(mm.work) # print mm.work = '' # clear while (not mm.eof): # lex block while True: mm.read() # read if (mm.work.isalpha()): # while while mm.work.isalpha(): if mm.eof: break mm.read() mm.tape[mm.cell] = mm.work # put if (mm.work == "the" or mm.work == "this" or mm.work == "her" or mm.work == "his" or mm.work == "a" or mm.work == "one" or mm.work == "some"): mm.work = '' # clear mm.work += "article*" mm.push(); break if (mm.work == "up" or mm.work == "in" or mm.work == "at" or mm.work == "on" or mm.work == "with" or mm.work == "under" or mm.work == "to"): mm.work = '' # clear mm.work += "preposition*" mm.push(); break if (mm.work == "simple" or mm.work == "big" or mm.work == "small" or mm.work == "blue" or mm.work == "beautiful" or mm.work == "small"): mm.work = '' # clear mm.work += "adjective*" mm.push(); break if (mm.work == "flower" or mm.work == "tree" or mm.work == "dog" or mm.work == "house" or mm.work == "horse" or mm.work == "girl" or mm.work == "fish" or mm.work == "meat"): mm.work = '' # clear mm.work += "noun*" mm.push(); break if (mm.work == "runs" or mm.work == "eats" or mm.work == "sleeps" or mm.work == "is" or mm.work == "grows" or mm.work == "digs" or mm.work == "sings"): mm.work = '' # clear mm.work += "verb*" mm.push(); break mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "<" mm.work += mm.tape[mm.cell] # get mm.work += ">" mm.work += " Sorry, don't understand that word! \n" sys.stdout.write(mm.work) # print mm.work = '' # clear exit() # use a full-stop to complete sentence if (mm.work == "."): mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "dot*" mm.push(); # ignore every thing else mm.work = '' # clear break # parse block while True: # 2 tokens mm.pop(); mm.pop(); if (mm.work == "article*noun*"): mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0: mm.cell -= 1 # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "nounphrase*" mm.push(); continue if (mm.work == "verb*preposition*"): mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0: mm.cell -= 1 # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "verbphrase*" mm.push(); continue # 3 tokens mm.pop(); if (mm.work == "noun*verb*dot*" or mm.work == "nounphrase*verb*dot*" or mm.work == "noun*verbphrase*dot*" or mm.work == "nounphrase*verbphrase*dot*"): mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0: mm.cell -= 1 # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sentence*" mm.push(); continue if (mm.work == "article*adjective*noun*"): mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0: mm.cell -= 1 # -- if mm.cell > 0: mm.cell -= 1 # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "nounphrase*" mm.push(); continue # 4 tokens mm.pop(); if (mm.work == "nounphrase*verb*noun*dot*" or mm.work == "noun*verb*noun*dot*" or mm.work == "nounphrase*verb*nounphrase*dot*" or mm.work == "noun*verb*nounphrase*dot*" or mm.work == "nounphrase*verbphrase*nounphrase*dot*" or mm.work == "noun*verbphrase*nounphrase*dot*" or mm.work == "nounphrase*verbphrase*noun*dot*" or mm.work == "noun*verbphrase*noun*dot*"): mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0: mm.cell -= 1 # -- if mm.cell > 0: mm.cell -= 1 # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sentence*" mm.push(); continue mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof): mm.pop(); mm.pop(); if (mm.work == "sentence*"): mm.work = '' # clear mm.work += "It's an english sentence! \n(" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" mm.work += "But it may not make sense! \n" sys.stdout.write(mm.work) # print mm.work = '' # clear exit() if (mm.work == "nounphrase*"): mm.work = '' # clear mm.work += "its a noun-phrase! (" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" sys.stdout.write(mm.work) # print mm.work = '' # clear exit() if (mm.work == "verbphrase*"): mm.work = '' # clear mm.work += "its a verb-phrase! (" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" sys.stdout.write(mm.work) # print mm.work = '' # clear exit() mm.push(); mm.push(); mm.work += "nope, not a sentence. \n" sys.stdout.write(mm.work) # print mm.work = '' # clear mm.work += "The parse stack was: \n " sys.stdout.write(mm.work) # print mm.work = '' # clear while (mm.pop()): continue # unstack mm.work += "\n" sys.stdout.write(mm.work) # print exit() break # parse # end of generated code