#!/usr/bin/env python3

# code generated by "translate.py.pss" a pep script
# bumble.sf.net/books/pars/
import sys, re    # for sys.read(), write() and regex
from unicodedata import category # for matching classes
# may use, which could make the char class code easier
# import regex
# regex.findall(r'[[:graph:]]', 'a 0 a b z') 

class Machine: 
  # make a new machine 
  def __init__(self):
    self.size = 300      # how many elements in stack/tape/marks
    self.eof = False     # end of stream reached?
    self.charsRead = 0   # how many chars already read
    self.linesRead = 1   # how many lines already read
    self.escape = "\\"
    self.delimiter = "*" # push/pop delimiter (default "*")
    self.counter = 0     # a counter for anything
    self.work = ""       # the workspace
    self.stack = []      # stack for parse tokens 
    self.cell = 0                # current tape cell
    self.tape = [""]*self.size   # a list of attribute for tokens 
    self.marks = [""]*self.size  # marked tape cells
    # or dont initialse peep until "parse()" calls "setInput()"
    self.peep = sys.stdin.read(1)

  def setInput(self, newInput): 
    print("to be implemented")

  # read one character from the input stream and 
  #    update the machine.
  def read(self): 
    if self.eof: System.exit(0)
    self.charsRead += 1;
    # increment lines
    if self.peep == "\n": self.linesRead += 1
    self.work += self.peep
    self.peep = sys.stdin.read(1) 
    if not self.peep: self.eof = True

  # increment the tape pointer (command ++) and increase the 
  # tape and marks array sizes if necessary
  def increment(self): 
    self.cell += 1
    if self.cell >= self.size: 
      self.tape.append("")
      self.marks.append("")
      self.size += 1

  # test if all chars in the text are in the unicode category
  # no! bug! because while checks mm.peep, but class test
  # checks mm.work. so have to adapt this function for either.
  def isInCategory(self, cat, text): 
    for ch in text:
      if not category(ch).startswith(cat): return False
    return True

  # def  
  # remove escape character: trivial method ?
  def unescapeChar(self, c):
    if len(self.work) > 0:
      self.work = self.work.replace("\\"+c, c)

  # add escape character : trivial
  def escapeChar(self, c):
    if len(self.work) > 0:
      self.work = self.work.replace(c, "\\"+c)

  # a helper function for the multiple escape char bug
  def countEscaped(self, suffix): 
    count = 0
    if self.work.endswith(suffix):
      # removesuffix not available in early python
      s = self.work.removesuffix(suffix)
    while s.endswith(self.escape):
      count += 1
      s = s.removesuffix(self.escape)
    return count

  # reads the input stream until the workspace end with text 
  def until(self, suffix): 
    # read at least one character
    if self.eof: return
    self.read()
    while True: 
      if self.eof: return
      # no. bug! count the trailing escape chars, odd=continue, even=stop
      if self.work.endswith(suffix):
        #and (not self.work.endswith(self.escape + suffix)): 
        if self.countEscaped(suffix) % 2 == 0: return
      self.read()
    
  # pop the first token from the stack into the workspace */
  def pop(self): 
    if len(self.stack) == 0: return False
    self.work = mm.stack.pop() + self.work
    if self.cell > 0: self.cell -= 1
    return True

  # push the first token from the workspace to the stack 
  def push(self): 
    # dont increment the tape pointer on an empty push
    if len(self.work) == 0: return False
    # need to get this from the delimiter.
    iFirst = self.work.find(self.delimiter);
    if iFirst == -1:
      self.stack.append(self.work)
      self.work = "" 
      return True
    self.stack.append(self.work[0:iFirst+1])
    self.work = self.work[iFirst+1:]
    self.increment()
    return True

  # this function is not used (the code is "inlined") 
  def swap(self): 
    s = self.work
    self.work = self.tape[self.cell]
    self.tape[self.cell] = s

  def goToMark(self, mark):
    markFound = False  
    length = len(self.marks)
    for ii in range(length): 
      if (mm.marks[ii] == mark):
        mm.cell = ii; markFound = True
    if (markFound == False):
      print("badmark '" + mark + "'!") 
      exit()

  def writeToFile(self): 
    f = open("sav.pp", "w")
    f.write(self.work) 
    f.close() 

  def printState(self): 
    print("Stack[" + ",".join(self.stack) + 
      "] Work[" + self.work + "] Peep[" + self.peep + "]");
    print("Acc:" + str(self.counter) + " Esc:" + self.escape +
          " Delim:" + self.delimiter + " Chars:" + str(self.charsRead) +
          " Lines:" + str(self.linesRead) + " Cell:" + str(self.cell));

  # this is where the actual parsing/compiling code should go
  # so that it can be used by other python classes/objects. Also
  # should have a stream argument.
  def parse(self, s): 
    # a reset or "setinput()" method would be useful to parse a 
    # different string/file/stream, without creating a new
    # machine object.
    # could use code like this to check if input is string or file
    if isinstance(s, file):
      print("")
      # self.reset(s)
      # self.reader = s
    elif isinstance(s, string):
      f = StringIO.StringIO("test")
      for line in f: print(line)
    else:
      f = sys.stdin
    sys.stdout.write("not implemented")


# end of Machine class definition

# will become:
# mm.parse(sys.stdin)  or 
# mm.parse("abcdef") or
# open f; mm.parse(f)

temp = ""    
mm = Machine() 
# the empty recordset trick to simplify the grammar rules
mm.work += "recordset*"
mm.push();
while (not mm.eof): 
  
  # lex block 
  while True: 
    mm.read()           # read
    if (re.match(r"^[\n]+$", mm.work)):
      # just to debug
      # lines; print;
      mm.work = ''              # clear
    # whilenot  
    while not re.match(r"^[\n]+$", mm.peep):
      if mm.eof:  break
      mm.read()
    # ignore blank lines
    if (mm.work == "" or re.match(r"^[\s]+$", mm.work)):
      mm.work = ''              # clear
      break
    mm.tape[mm.cell] = mm.work  # put 
    if (mm.work.startswith("#") and mm.work != "#"):
      if (re.match(r"^[#0123456789]+$", mm.work)):
        mm.work = ''              # clear
        mm.work += "timestamp*"
        mm.push();
        break
      mm.work = ''              # clear
      mm.work += "comment*"
      mm.push();
      break
    # tag the command as trivial if it is 
    # for later removal. If there is a comment above it we may keep it anyway
    # tag as trivial all commands less than 5 characters
    # if len(mm.work) > 0:  # clip 
    mm.work = mm.work[:-1]  # clip
    # if len(mm.work) > 0:  # clip 
    mm.work = mm.work[:-1]  # clip
    # if len(mm.work) > 0:  # clip 
    mm.work = mm.work[:-1]  # clip
    # if len(mm.work) > 0:  # clip 
    mm.work = mm.work[:-1]  # clip
    if (mm.work == ""):
      mm.work = ''              # clear
      mm.work += "trivial*"
      mm.push();
      break
    mm.work = ''              # clear
    mm.work += mm.tape[mm.cell] # get
    if (mm.work.startswith("df ") or mm.work == "df" or mm.work.startswith("du ") or mm.work.startswith("mv ") or mm.work.startswith("cp ") or mm.work.startswith("less ") or mm.work.startswith("vim ") or mm.work.startswith("rm ") or mm.work.startswith("mkdir ") or mm.work.startswith("find ") or mm.work.startswith("locate ") or mm.work.startswith("cd ") or mm.work == "cd" or mm.work.startswith("ls ") or mm.work == "ls" or mm.work == "pwd" or mm.work == "hist" or mm.work == "books" or mm.work == "bk" or mm.work == "ho" or mm.work == "updatedb" or mm.work == "bashrc" or mm.work == "vimrc" or mm.work == "os" or mm.work == "cos" or mm.work == "ccos" or mm.work == "make"):
      mm.work = ''              # clear
      mm.work += "trivial*"
      mm.push();
      break
    mm.work = ''              # clear
    mm.work += "command*"
    mm.push();
    break 
  
  # parse block 
  while True:  
    # for debugging
    # add "line "; lines; add " char "; chars; add ": "; print; clear; 
    #add "line "; lines; add ": "; print; clear; 
    #unstack; print; stack; add "\n"; print; clear;
    # ----------------
    # 2 tokens
    mm.pop();
    mm.pop();
    # ignore duplicated timestamps. 
    if (mm.work == "timestamp*timestamp*"):
      mm.work = ''              # clear
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "timestamp*"
      mm.push();
      continue
    # handle multiline comments
    if (mm.work == "comment*comment*"):
      mm.work = ''              # clear
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "comment*"
      mm.push();
      continue
    # dont need because an initial recordset always exists
    #"record*record*","recordset*record*" {
    if (mm.work == "recordset*record*"):
      mm.work = ''              # clear
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      # debug code
      # a+; count; add " record!\n"; print; clear;
      mm.work += "recordset*"
      mm.push();
      continue
    # this will be compiled differently from r*r*
    if (mm.work == "recordset*command*"):
      mm.work = ''              # clear
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "recordset*"
      mm.push();
      continue
    if (mm.work == "recordset*trivial*"):
      mm.counter += 1  # a+ 
      # count filtered commands
      mm.work = ''              # clear
      mm.work += "recordset*"
      mm.push();
      continue
    if (mm.eof):
      # clean up trailing comments etc
      if (mm.work == "recordset*timestamp*" or mm.work == "recordset*comment*"):
        mm.work = ''              # clear
        mm.work += "recordset*record*"
        mm.push();
        mm.push();
        continue
    # 3 tokens
    mm.pop();
    # remove trivial commands without comments
    if (mm.work == "recordset*timestamp*trivial*"):
      mm.counter += 1  # a+ 
      # count filtered commands
      mm.work = ''              # clear
      mm.work += "recordset*"
      mm.push();
      continue
    # ignore duplicated timestamps. 
    if (mm.work == "timestamp*comment*timestamp*"):
      mm.work = ''              # clear
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.increment()      # ++ 
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      if mm.cell > 0: mm.cell -= 1  # --
      mm.work = ''              # clear
      mm.work += "comment*timestamp*"
      mm.push();
      mm.push();
      continue
    # amalgamate comments before and after the timestamp
    if (mm.work == "comment*timestamp*comment*"):
      mm.work = ''              # clear
      mm.work += mm.tape[mm.cell] # get
      mm.increment()      # ++ 
      mm.increment()      # ++ 
      mm.work += "\n"
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "comment*timestamp*"
      mm.push();
      mm.push();
      continue
    if (mm.work == "comment*timestamp*command*" or mm.work == "comment*timestamp*trivial*"):
      mm.work = ''              # clear
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "record*"
      mm.push();
      continue
    # dont remove trivial commands with comments
    if (mm.work == "timestamp*comment*command*" or mm.work == "timestamp*comment*trivial*"):
      mm.work = ''              # clear
      # switch the order to make comment precede timestamp
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      if mm.cell > 0: mm.cell -= 1  # --
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''              # clear
      mm.work += "record*"
      mm.push();
      continue
    if (mm.work == "recordset*timestamp*command*"):
      mm.work = ''              # clear
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      if mm.cell > 0: mm.cell -= 1  # --
      mm.work = ''              # clear
      mm.work += "recordset*record*"
      mm.push();
      mm.push();
      continue
    # resolve commands and trivial command with comments
    if (mm.work == "recordset*comment*command*" or mm.work == "recordset*comment*trivial*"):
      mm.work = ''              # clear
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      mm.work += "\n"
      mm.increment()      # ++ 
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0: mm.cell -= 1  # --
      mm.tape[mm.cell] = mm.work  # put 
      if mm.cell > 0: mm.cell -= 1  # --
      mm.work = ''              # clear
      mm.work += "recordset*record*"
      mm.push();
      mm.push();
      continue
    mm.push();
    mm.push();
    mm.push();
    if (mm.eof):
      mm.pop();
      mm.pop();
      if (mm.work != "recordset*"):
        mm.push();
        mm.push();
        mm.work += "# History file did not parse well!\n"
        sys.stdout.write(mm.work) # print
        mm.work = ''              # clear
        mm.work += "# Parse stack was: "
        sys.stdout.write(mm.work) # print
        mm.work = ''              # clear
        while (mm.pop()):  continue    # unstack 
        mm.work += "\n"
        sys.stdout.write(mm.work) # print
        exit()
      if (mm.work == "recordset*"):
        mm.work = ''              # clear
        mm.work += mm.tape[mm.cell] # get
        mm.work += "\n# History file parsed and filtered by pars/eg/bash.history.pss \n"
        mm.work += "# "
        mm.work += str(mm.counter) # count 
        mm.work += " trivial commands (without preceding comments) were removed.\n"
        sys.stdout.write(mm.work) # print
    break # parse
  

# end of generated code