#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 #s = @work.sub(/#{suffix}$/, "") s = @work.delete_suffix(suffix) while s.end_with?(@escape) count += 1 s.delete_suffix!(@escape) end # puts "count=#{count}" return count end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix).even?) then return end end self.read() end end # this implements the ++ command incrementing the tape pointer # and growing the tape and marks arrays if required def increment() @cell += 1 if @cell >= @size then @tape.append("") @marks.append("") @size += 1 end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] self.increment() return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end def goToMark(mark) ii = @marks.find_index(mark) if !ii.nil? then @cell = ii else print("bad mark '" + mark + "'!") exit end end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new while !mm.eof do # lex block while true mm.read() # read # ignore \r if (mm.work.match?(/^[\r]+$/)) then mm.work = '' # clear if (mm.eof) then break end restart = true; break # restart end # dont tokenize non-leading space. One space will be printed # between each word. if (mm.work.match?(/^[ \t\f]+$/)) then # while while /^[ \t\f]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.work = '' # clear if (mm.eof) then break end restart = true; break # restart end if (mm.work.match?(/^[\n]+$/)) then # make character count relative to line. mm.charsRead = 0 # nochars # save the leading space in the nl* token # while while /^[[:space:]]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "nl*" mm.push(); break end # everything else is a word if (mm.work != "") then # whilenot while !/^[[:space:]]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "word*" mm.push(); break end break end if restart then restart = false; next; end # parse block while true # to visualise parse token reductions mm.work += "line " mm.work += mm.linesRead.to_s # lines mm.work += " char " mm.work += mm.charsRead.to_s # chars mm.work += ": " print mm.work # print mm.work = '' # clear while mm.pop() do next end # unstack mm.work += "\n" print mm.work # print if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end while mm.push() do next end # stack #------- # 1 token mm.pop(); #------- # 2 tokens mm.pop(); # I want to recognise 2 word structures, so need to separate # the text*word* reduction from the word*word* rule. # is there any need for a file* token, link token etc? if (mm.work == "word*word*" || mm.work == "text*word*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "text*" mm.push(); next end if (mm.work == "word*nl*" || mm.work == "text*nl*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "line*" mm.push(); next end if (mm.work == "line*line*" || mm.work == "lineset*line*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "lineset*" mm.push(); next end mm.push(); mm.push(); if (mm.eof) then mm.pop(); if (mm.work == "word*" || mm.work == "text*" || mm.work == "line*" || mm.work == "lineset*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print mm.work = '' # clear exit end end break end # parse end # end of generated code