#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end # multiline strings are ok in ruby def printSizeError() puts " Tape max size exceeded! tape maximum size = #{@size} tape cell (current) = #{@cell} You can increase the @size value in the ruby script but normally this error indicates an error in your parsing script. The only exception would be massively nested structures in the source data." end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 #s = @work.sub(/#{suffix}$/, "") s = @work.delete_suffix(suffix) while s.end_with?(@escape) count += 1 s.delete_suffix!(@escape) end # puts "count=#{count}" return count end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix).even?) then return end end self.read() end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] if @cell < @size then @cell += 1 else self.printSizeError(); exit end return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new while !mm.eof do # lex block while true mm.read() # read if (mm.work == "+" || mm.work == "-") then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "opadd*" mm.push(); end if (mm.work == "*" || mm.work == "/") then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "opmul*" mm.push(); end if (mm.work == "(" || mm.work == ")") then mm.tape[mm.cell] = mm.work # put mm.work += "*" mm.push(); end if (mm.work.match?(/^[0-9]+$/)) then # while while /^[0-9]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); end if (mm.work.match?(/^[a-z]+$/)) then # while while /^[a-z]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "variable*" mm.push(); end if (mm.work.match?(/^[[:space:]]+$/)) then mm.work = '' # clear end # a trick to catch bad characters. # better would be a !"text" test if (mm.work == "") then break end mm.work += " << incorrect character (at character " mm.work += mm.charsRead.to_s # chars mm.work += " of input). \n" print mm.work # print exit break end if restart then restart = false; next; end # parse block while true # The parse/compile/translate/transform phase involves # recognising series of tokens on the stack and "reducing" them # according to the required bnf grammar rules. mm.pop(); # resolve numbers to expressions to simplify grammar rules # add a preceding space to numbers and variables. if (mm.work == "number*" || mm.work == "variable*") then mm.work = '' # clear mm.work += " " mm.work += mm.tape[mm.cell] # get mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*" mm.push(); next end #----------------- # 3 tokens mm.pop(); mm.pop(); # we dont need any look ahead here because * and / have # precedence. if (mm.work == "exp*opmul*exp*") then mm.work = '' # clear mm.work += " (" mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.work += mm.tape[mm.cell] # get mm.cell += 1 # ++ mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += ")" if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*" mm.push(); next end if (mm.work == "(*exp*)*") then mm.work = '' # clear mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*" mm.push(); next end if (mm.eof) then if (mm.work == "exp*opadd*exp*") then mm.work = '' # clear mm.work += " (" mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.work += mm.tape[mm.cell] # get mm.cell += 1 # ++ mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += ")" if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*" mm.push(); next end end #----------------- # 4 tokens mm.pop(); if (mm.work == "exp*opadd*exp*opadd*") then mm.work = '' # clear mm.work += " (" mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.work += mm.tape[mm.cell] # get mm.cell += 1 # ++ mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += ")" if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*opadd*" mm.push(); mm.push(); next end if (mm.work == "exp*opadd*exp*)*") then mm.work = '' # clear mm.work += " (" mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.work += mm.tape[mm.cell] # get mm.cell += 1 # ++ mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += ")" if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exp*)*" mm.push(); mm.push(); next end mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) then mm.pop(); mm.pop(); if (mm.work == "exp*") then mm.work = '' # clear # add "Yes, its an expression! \n"; mm.work += "lisp format: " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print mm.work = '' # clear exit end mm.push(); mm.push(); mm.work += "No, it doesn't look like a valid 'in-fix' expression. \n" mm.work += "The parse stack was: " print mm.work # print mm.work = '' # clear while mm.pop() do next end # unstack mm.work += "\n" print mm.work # print exit end break end # parse end # end of generated code