#!/usr/bin/ruby

# code generated by "translate.ruby.pss" a pep script
# http://bumble.sf.net/books/pars/tr/
# require 'something'

class Machine 
  # make a new machine 
  attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, 
    :counter, :stack, :tape, :cell, :marks, :eof, :peep
  
  def initialize() 
    @size = 300      # how many elements in stack/tape/marks
    @eof = false     # end of stream reached?
    @charsRead = 0   # how many chars already read
    @linesRead = 1   # how many lines already read
    @escape = "\\"
    @delimiter = "*" # push/pop delimiter (default "*")
    @counter = 0     # a counter for anything
    @work = ""       # the workspace
    @stack = []      # stack for parse tokens 
    @cell = 0                # current tape cell
    @tape = Array.new(@size) {String.new}    # a list of attribute for tokens 
    @marks = Array.new(@size) {String.new}   # marked tape cells 
    # or dont initialse peep until "parse()" calls "setInput()"
    @peep = ARGF.readchar
  end

  def setInput(newInput) 
    puts "to be implemented"
  end

  # read one character from the input stream and 
  #    update the machine.
  def read 
    if @eof then exit end
    @charsRead += 1;
    # increment lines
    if @peep == "\n" then @linesRead += 1 end
    @work += @peep
    @peep = ARGF.readchar
    if @peep.nil? then @eof = true; end
  end

  # test if all chars in workspace are in unicode category
  def isInCategory(cat) 
    #for ch in @work
    #  if not category(ch).start_with?(cat) then return false end
    #return True
  end

  # this needs to actually walk the string
  # eg "abcab\cabc"
  # not trivial
  def unescapeChar(c)
    @work.gsub!("#{@escape}#{c}", c)
  end

  # add escape character : trivial?
  def escapeChar(c)
    @work.gsub!(c, @escape+c)
  end

  # a helper for the multiescape until bug
  def countEscaped(suffix) 
    count = 0
    #s = @work.sub(/#{suffix}$/, "")
    s = @work.delete_suffix(suffix)
    while s.end_with?(@escape) 
      count += 1
      s.delete_suffix!(@escape)
    end
    # puts "count=#{count}"
    return count
  end

  # reads the input stream until the workspace end with text 
  def until(suffix) 
    # read at least one character
    if @eof then return end
    self.read()
    while true do
      if @eof then return end
      # need to count the @escape chars preceding suffix
      # if odd, keep reading, if even, stop
      if @work.end_with?(suffix) then 
        if (self.countEscaped(suffix).even?) then return end
      end
      self.read()
    end
  end  

  # this implements the ++ command incrementing the tape pointer
  # and growing the tape and marks arrays if required
  def increment()
    @cell += 1
    if @cell >= @size then 
      @tape.append("")
      @marks.append("")
      @size += 1
    end
  end

  # pop the first token from the stack into the workspace */
  def pop() 
    if @stack.length == 0 then return false end
    @work = @stack.pop() + @work
    if @cell > 0 then @cell -= 1 end
    return true
  end

  # push the first token from the workspace to the stack 
  def push() 
    # dont increment the tape pointer on an empty push
    if @work == "" then return false end
    # need to get this from the delimiter.
    iFirst = @work.index(@delimiter)
    if iFirst.nil?
      @stack.push(@work); @work = ""; return true
      # also @stack << @work
    end
    # s[i..j] means all chars from i to j
    # s[i,n] means n chars from i
    @stack.push(@work[0..iFirst])
    @work = @work[iFirst+1..-1]
    self.increment()
    return true
  end

  def printState() 
    puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]"
    puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" +
         " Lines:#{@linesRead} Cell:#{@cell}"
  end

  def goToMark(mark) 
    ii = @marks.find_index(mark)
    if !ii.nil? then 
      @cell = ii 
    else
      print("bad mark '" + mark + "'!") 
      exit
    end
  end

  # this is where the actual parsing/compiling code should go
  # so that it can be used by other ruby classes/objects. Also
  # should have a stream argument.
  def parse(s) 
    # a reset or "setinput()" method would be useful to parse a 
    # different string/file/stream, without creating a new
    # machine object.
    # could use code like this to check if input is string or file
    #if isinstance(s, file)
      print("")
      # @reset(s)
      # @reader = s
    #elseif isinstance(s, string)
      #f = StringIO.StringIO("test")
      #for line in f  print(line)
    #else
    #  f = STDIN 
    #end
    #puts "not implemented"
  end

end

# end of Machine class definition

# will become:
# mm.parse(sys.stdin)  or 
# mm.parse("abcdef") or
# open f; mm.parse(f)

# the restart flag, which allows .restart to work before the 
# parse label, in languages (like ruby) that dont have 
# labelled loops
restart = false
mm = Machine.new 
while !mm.eof do 
  
  # lex block 
  while true 
    mm.read()        # read
    # ignore \r
    if (mm.work.match?(/^[\r]+$/)) then
      mm.work = ''        # clear
      if (mm.eof) then
        break
      end
      restart = true; break # restart
    end
    # dont tokenize non-leading space. One space will be printed
    # between each word.
    if (mm.work.match?(/^[ \t\f]+$/)) then
      # while  
      while /^[ \t\f]+$/.match?(mm.peep)
        if mm.eof then break end
        mm.read()
      end
      mm.work = ''        # clear
      if (mm.eof) then
        break
      end
      restart = true; break # restart
    end
    if (mm.work.match?(/^[\n]+$/)) then
      # make character count relative to line.
      mm.charsRead = 0           # nochars 
      # save the leading space in the nl* token 
      # while  
      while /^[[:space:]]+$/.match?(mm.peep)
        if mm.eof then break end
        mm.read()
      end
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''        # clear
      mm.work += "nl*"
      mm.push();
      break
    end
    # everything else is a word
    if (mm.work != "") then
      # whilenot  
      while !/^[[:space:]]+$/.match?(mm.peep)
        if mm.eof then break end
        mm.read()
      end
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''        # clear
      mm.work += "word*"
      mm.push();
      break
    end
    break 
  end
  if restart then restart = false; next; end
  
  # parse block 
  while true 
    # to visualise parse token reductions
    mm.work += "line "
    mm.work += mm.linesRead.to_s # lines 
    mm.work += " char "
    mm.work += mm.charsRead.to_s # chars 
    mm.work += ": "
    print mm.work       # print
    mm.work = ''        # clear
    while mm.pop() do next end  # unstack 
    mm.work += "\n"
    print mm.work       # print
    if !mm.work.empty? then       # clip 
      mm.work = mm.work[0..-2]   # clip 
    end
    while mm.push() do next end # stack 
    #-------
    # 1 token
    mm.pop();
    #-------
    # 2 tokens
    mm.pop();
    # I want to recognise 2 word structures, so need to separate
    # the text*word* reduction from the word*word* rule. 
    # is there any need for a file* token, link token etc? 
    if (mm.work == "word*word*" || mm.work == "text*word*") then
      mm.work = ''        # clear
      mm.work += mm.tape[mm.cell] # get
      mm.work += " "
      mm.increment()      # ++
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0 then mm.cell -= 1; end # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''        # clear
      mm.work += "text*"
      mm.push();
      next
    end
    if (mm.work == "word*nl*" || mm.work == "text*nl*") then
      mm.work = ''        # clear
      mm.work += mm.tape[mm.cell] # get
      mm.increment()      # ++
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0 then mm.cell -= 1; end # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''        # clear
      mm.work += "line*"
      mm.push();
      next
    end
    if (mm.work == "line*line*" || mm.work == "lineset*line*") then
      mm.work = ''        # clear
      mm.work += mm.tape[mm.cell] # get
      mm.increment()      # ++
      mm.work += mm.tape[mm.cell] # get
      if mm.cell > 0 then mm.cell -= 1; end # --
      mm.tape[mm.cell] = mm.work  # put 
      mm.work = ''        # clear
      mm.work += "lineset*"
      mm.push();
      next
    end
    mm.push();
    mm.push();
    if (mm.eof) then
      mm.pop();
      if (mm.work == "word*" || mm.work == "text*" || mm.work == "line*" || mm.work == "lineset*") then
        mm.work = ''        # clear
        mm.work += mm.tape[mm.cell] # get
        mm.work += "\n"
        print mm.work       # print
        mm.work = ''        # clear
        exit
      end
    end
    break 
  end # parse
  
end


# end of generated code