#!/usr/bin/env python3 # code generated by "translate.py.pss" a pep script # bumble.sf.net/books/pars/ import sys, re # for sys.read(), write() and regex from unicodedata import category # for matching classes # may use, which could make the char class code easier # import regex # regex.findall(r'[[:graph:]]', 'a 0 a b z') class Machine: # make a new machine def __init__(self): self.size = 300 # how many elements in stack/tape/marks self.eof = False # end of stream reached? self.charsRead = 0 # how many chars already read self.linesRead = 1 # how many lines already read self.escape = "\\" self.delimiter = "*" # push/pop delimiter (default "*") self.counter = 0 # a counter for anything self.work = "" # the workspace self.stack = [] # stack for parse tokens self.cell = 0 # current tape cell self.tape = [""]*self.size # a list of attribute for tokens self.marks = [""]*self.size # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" self.peep = sys.stdin.read(1) def setInput(self, newInput): print("to be implemented") # read one character from the input stream and # update the machine. def read(self): if self.eof: System.exit(0) self.charsRead += 1; # increment lines if self.peep == "\n": self.linesRead += 1 self.work += self.peep self.peep = sys.stdin.read(1) if not self.peep: self.eof = True # increment the tape pointer (command ++) and increase the # tape and marks array sizes if necessary def increment(self): self.cell += 1 if self.cell >= self.size: self.tape.append("") self.marks.append("") self.size += 1 # test if all chars in the text are in the unicode category # no! bug! because while checks mm.peep, but class test # checks mm.work. so have to adapt this function for either. def isInCategory(self, cat, text): for ch in text: if not category(ch).startswith(cat): return False return True # def # remove escape character: trivial method ? def unescapeChar(self, c): if len(self.work) > 0: self.work = self.work.replace("\\"+c, c) # add escape character : trivial def escapeChar(self, c): if len(self.work) > 0: self.work = self.work.replace(c, "\\"+c) # a helper function for the multiple escape char bug def countEscaped(self, suffix): count = 0 if self.work.endswith(suffix): # removesuffix not available in early python s = self.work.removesuffix(suffix) while s.endswith(self.escape): count += 1 s = s.removesuffix(self.escape) return count # reads the input stream until the workspace end with text def until(self, suffix): # read at least one character if self.eof: return self.read() while True: if self.eof: return # no. bug! count the trailing escape chars, odd=continue, even=stop if self.work.endswith(suffix): #and (not self.work.endswith(self.escape + suffix)): if self.countEscaped(suffix) % 2 == 0: return self.read() # pop the first token from the stack into the workspace */ def pop(self): if len(self.stack) == 0: return False self.work = mm.stack.pop() + self.work if self.cell > 0: self.cell -= 1 return True # push the first token from the workspace to the stack def push(self): # dont increment the tape pointer on an empty push if len(self.work) == 0: return False # need to get this from the delimiter. iFirst = self.work.find(self.delimiter); if iFirst == -1: self.stack.append(self.work) self.work = "" return True self.stack.append(self.work[0:iFirst+1]) self.work = self.work[iFirst+1:] self.increment() return True # this function is not used (the code is "inlined") def swap(self): s = self.work self.work = self.tape[self.cell] self.tape[self.cell] = s def goToMark(self, mark): markFound = False length = len(self.marks) for ii in range(length): if (mm.marks[ii] == mark): mm.cell = ii; markFound = True if (markFound == False): print("badmark '" + mark + "'!") exit() def writeToFile(self): f = open("sav.pp", "w") f.write(self.work) f.close() def printState(self): print("Stack[" + ",".join(self.stack) + "] Work[" + self.work + "] Peep[" + self.peep + "]"); print("Acc:" + str(self.counter) + " Esc:" + self.escape + " Delim:" + self.delimiter + " Chars:" + str(self.charsRead) + " Lines:" + str(self.linesRead) + " Cell:" + str(self.cell)); # this is where the actual parsing/compiling code should go # so that it can be used by other python classes/objects. Also # should have a stream argument. def parse(self, s): # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file if isinstance(s, file): print("") # self.reset(s) # self.reader = s elif isinstance(s, string): f = StringIO.StringIO("test") for line in f: print(line) else: f = sys.stdin sys.stdout.write("not implemented") # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) temp = "" mm = Machine() while (not mm.eof): # lex block while True: mm.read() # read # make char number relative to line, for error messages if (re.match(r"^[\n]+$", mm.work)): mm.charsRead = 0 # nochars # newlines can separate commands in (gnu) sed so we will # just add a dummy ';' here. Also, no trailing ; is required if (re.match(r"^[\n]+$", mm.work)): mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += ";*" mm.push(); break # ignore extraneous white-space? if (re.match(r"^[\s]+$", mm.work)): mm.work = '' # clear if (mm.eof): break continue # comments, convert to java comments if (mm.work == "#"): mm.work = '' # clear mm.work += "/* " mm.until("\n"); if (mm.work.endswith("\n")): # if len(mm.work) > 0: # clip mm.work = mm.work[:-1] # clip mm.work += " */\n" mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # uncomment line below to include comments in output # add "comment*"; push; .reparse # literal tokens '{' and '}' are used to group commands in # sed, ';' is used to separate commands and ',' to separate line # ranges. ! is the postfix negation operator for ranges if (mm.work == "," or mm.work == "{" or mm.work == "}" or mm.work == ";" or mm.work == "!"): mm.tape[mm.cell] = mm.work # put mm.work += "*" mm.push(); break # various actions: print, delete, swap if (mm.work == "=" or mm.work == "p" or mm.work == "P" or mm.work == "l" or mm.work == "d" or mm.work == "D" or mm.work == "F" or mm.work == "g" or mm.work == "G" or mm.work == "h" or mm.work == "H" or mm.work == "n" or mm.work == "N" or mm.work == "x" or mm.work == "z"): if (mm.work == "="): mm.work = '' # clear # print line-number + newline mm.work += "System.out.println(mm.linesRead); /* '=' */" if (mm.work == "d"): mm.work = '' # clear # 'd' delete pattern-space, restart # the if true trick is necessary to avoid 'unreachable statement' # java compile errors (when multiple 'd' commands are given) mm.work += "if (true) { mm.patternSpace.setLength(0); continue; } /* 'd' */" if (mm.work == "D"): mm.work = '' # clear # add "/* 'D' delete pattern-space to 1st \\n, restart */"; mm.work += "if (mm.patternSpace.indexOf(\"\\n\") > -1) {\n" mm.work += " mm.patternSpace.delete(0, mm.patternSpace.indexOf(\"\\n\"));\n" mm.work += " mm.readNext = false; if (true) continue; \n" mm.work += "} else { mm.patternSpace.setLength(0); continue; } /* 'd' */" if (mm.work == "F"): # F: print input filename + newline # maybe unsupported in java mm.work = '' # clear mm.work += "System.out.println(\"\"); /* F */" if (mm.work == "g"): # g: replace patt-space with hold-space mm.work = '' # clear mm.work += "mm.patternSpace.setLength(0); \n" mm.work += "mm.patternSpace.append(mm.holdSpace); /* 'g' */" if (mm.work == "G"): # G; append hold-space to patt-space + \\n" mm.work = '' # clear mm.work += "mm.patternSpace.append(\"\\n\" + mm.holdSpace); /* 'G' */" if (mm.work == "h"): # h: replace hold-space with patt-space mm.work = '' # clear mm.work += "mm.holdSpace.setLength(0); \n" mm.work += "mm.holdSpace.append(mm.patternSpace); /* 'h' */" if (mm.work == "H"): # H: append patt-space to hold-space + newline mm.work = '' # clear mm.work += "mm.holdSpace.append(\"\\n\" + mm.patternSpace); /* 'H' */" if (mm.work == "l"): # print pattern-space unambiguously, synonym for p ? mm.work = '' # clear mm.work += "System.out.println(mm.patternSpace); /* 'l' */" if (mm.work == "n"): # n: print patt-space, get next line into patt-space mm.work = '' # clear mm.work += "if (mm.autoPrint) { System.out.println(mm.patternSpace); }\n" mm.work += "mm.patternSpace.setLength(0);\n" mm.work += "mm.readLine(); /* 'n' */" if (mm.work == "N"): # N: append next line to patt-space + newline mm.work = '' # clear mm.work += "mm.patternSpace.append('\\n'); " mm.work += "mm.readLine(); /* 'N' */" if (mm.work == "p"): mm.work = '' # clear mm.work += "System.out.println(mm.patternSpace); /* 'p' */" if (mm.work == "P"): # P: print pattern-space up to 1st newline" mm.work = '' # clear mm.work += "if (mm.patternSpace.indexOf(\"\\n\") > -1) {\n" mm.work += " System.out.println(\n" mm.work += " mm.patternSpace.substring(0, mm.patternSpace.indexOf(\"\\n\")));\n" mm.work += "} else { System.out.println(mm.patternSpace); }" if (mm.work == "x"): # x: # swap pattern-space with hold-space mm.work = '' # clear mm.work += "mm.swap(); /* x */" if (mm.work == "z"): # z: delete pattern-space, NO restart mm.work = '' # clear mm.work += "mm.patternSpace.setLenth(0); /* z */" mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "action*" mm.push(); break # line numbers are also selectors if (re.match(r"^[0-9]+$", mm.work)): # while while re.match(r"^[0-9]+$", mm.peep): if mm.eof: break mm.read() mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); break # $ is the last line of the file if (mm.work == "$"): mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); break # patterns - only execute commands if lines match if (mm.work == "/"): # save line/char number for error message mm.work = '' # clear mm.work += "near line/char " mm.work += str(mm.linesRead) # lines mm.work += ":" mm.work += str(mm.charsRead) # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.until("/"); if (not mm.work.endswith("/")): mm.work = '' # clear mm.work += "Missing '/' to terminate " mm.work += mm.tape[mm.cell] # get mm.work += "?\n" sys.stdout.write(mm.work) # print exit() # if len(mm.work) > 0: # clip mm.work = mm.work[:-1] # clip # java .matches method matches whole string not substring # so we need to add .* at beginning and end, but not if regex # begins with ^ or ends with $. complicated hey if (not mm.work.endswith("$")): mm.work += ".*$" if (not mm.work.startswith("^")): mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "^.*" mm.work += mm.tape[mm.cell] # get mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # add any delimiter for pattern here, or none mm.work += "\"" mm.work += mm.tape[mm.cell] # get mm.work += "\"" mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "pattern*" mm.push(); break # read transliteration commands if (mm.work == "y"): # save line/char number for error message mm.work = '' # clear mm.work += "near line " mm.work += str(mm.linesRead) # lines mm.work += ", char " mm.work += str(mm.charsRead) # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # allow spaces between 'y' and '/' although gnu set doesn't mm.until("/"); if (not mm.work.endswith("/") or not re.match(r"^[ /]+$", mm.work)): mm.work = '' # clear mm.work += "Missing '/' after 'y' transliterate command\n" mm.work += "Or trailing characters " mm.work += mm.tape[mm.cell] # get mm.work += "\n" sys.stdout.write(mm.work) # print exit() # save line/char number for error message mm.work = '' # clear mm.work += "near line " mm.work += str(mm.linesRead) # lines mm.work += ", char " mm.work += str(mm.charsRead) # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.until("/"); if (not mm.work.endswith("/")): mm.work = '' # clear mm.work += "Missing 2nd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" sys.stdout.write(mm.work) # print exit() if (mm.work == "/"): mm.work = '' # clear mm.work += "Sed syntax error? \n" mm.work += " Empty regex after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" sys.stdout.write(mm.work) # print exit() # replace pattern found # if len(mm.work) > 0: # clip mm.work = mm.work[:-1] # clip mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "y/" mm.work += mm.tape[mm.cell] # get mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # save line/char number for error message mm.work += "near line " mm.work += str(mm.linesRead) # lines mm.work += ", char " mm.work += str(mm.charsRead) # chars mm.increment() # ++ mm.tape[mm.cell] = mm.work # put if mm.cell > 0: mm.cell -= 1 # -- mm.work = '' # clear mm.until("/"); if (not mm.work.endswith("/")): mm.work = '' # clear mm.work += "Missing 3rd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" sys.stdout.write(mm.work) # print exit() # if len(mm.work) > 0: # clip mm.work = mm.work[:-1] # clip mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += "/" mm.work += mm.tape[mm.cell] # get mm.work += "/" # y/// does not have modifiers (unlike s///) mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "action*" mm.push(); break # various commands that have an option word parameter # e has two variants # "e" { replace "e" "e; # exec patt-space command and replace"; } if (mm.work == "b" or mm.work == "e" or mm.work == "q" or mm.work == "Q" or mm.work == "t" or mm.work == "T"): # ignore intervening space if any mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # while while re.match(r"^[ ]+$", mm.peep): if mm.eof: break mm.read() mm.work = '' # clear # A bit more permissive that gnu-sed which doesn't allow # read to end in ';'. # whilenot while not re.match(r"^[ ;}]+$", mm.peep): if mm.eof: break mm.read() # word parameters are optional to these commands # just add a space to separate command from parameter if (mm.work != ""): mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += " " mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += mm.tape[mm.cell] # get # hard to implement because java has no goto ? if (mm.work.startswith("b")): mm.work += "; # branch to