#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 #s = @work.sub(/#{suffix}$/, "") s = @work.delete_suffix(suffix) while s.end_with?(@escape) count += 1 s.delete_suffix!(@escape) end # puts "count=#{count}" return count end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix).even?) then return end end self.read() end end # this implements the ++ command incrementing the tape pointer # and growing the tape and marks arrays if required def increment() @cell += 1 if @cell >= @size then @tape.append("") @marks.append("") @size += 1 end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] self.increment() return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end def goToMark(mark) ii = @marks.find_index(mark) if !ii.nil? then @cell = ii else print("bad mark '" + mark + "'!") exit end end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new while !mm.eof do # lex block while true mm.read() # read # make char number relative to line, for error messages if (mm.work.match?(/^[\n]+$/)) then mm.charsRead = 0 # nochars end # newlines can separate commands in (gnu) sed so we will # just add a dummy ';' here. Also, no trailing ; is required if (mm.work.match?(/^[\n]+$/)) then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += ";*" mm.push(); break end # ignore extraneous white-space? if (mm.work.match?(/^[[:space:]]+$/)) then mm.work = '' # clear if (mm.eof) then break end restart = true; break # restart end # comments, if (mm.work == "#") then mm.until("\n"); if (!mm.work.end_with?("\n")) then mm.work += "\n" end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # uncomment line below to include comments in output # and make new reductions # add "comment*"; push; .reparse end # literal tokens '{' and '}' are used to group commands in # sed, ';' is used to separate commands and ',' to separate line # ranges. ! is the postfix negation operator for ranges if (mm.work == "," || mm.work == "{" || mm.work == "}" || mm.work == ";" || mm.work == "!") then mm.tape[mm.cell] = mm.work # put mm.work += "*" mm.push(); break end # various actions: print, delete, swap if (mm.work == "=" || mm.work == "p" || mm.work == "P" || mm.work == "l" || mm.work == "d" || mm.work == "D" || mm.work == "F" || mm.work == "g" || mm.work == "G" || mm.work == "h" || mm.work == "H" || mm.work == "n" || mm.work == "N" || mm.work == "x" || mm.work == "z") then if (mm.work == "=") then # replace if mm.work.length > 0 then mm.work.gsub!("=", "=; # print line-number + \\n") end end if (mm.work == "d") then # replace if mm.work.length > 0 then mm.work.gsub!("d", "d; # delete pattern-space, restart") end end if (mm.work == "D") then # replace if mm.work.length > 0 then mm.work.gsub!("D", "D; # delete pattern-space to 1st \\n, restart") end end if (mm.work == "e") then # replace if mm.work.length > 0 then mm.work.gsub!("e", "e; # exec patt-space command and replace") end end if (mm.work == "F") then # replace if mm.work.length > 0 then mm.work.gsub!("F", "F; # print input filename + \\n") end end if (mm.work == "g") then # replace if mm.work.length > 0 then mm.work.gsub!("g", "g; # replace patt-space with hold-space") end end if (mm.work == "G") then # replace if mm.work.length > 0 then mm.work.gsub!("G", "G; # append hold-space to patt-space + \\n") end end if (mm.work == "h") then # replace if mm.work.length > 0 then mm.work.gsub!("h", "h; # replace hold-space with patt-space") end end if (mm.work == "H") then # replace if mm.work.length > 0 then mm.work.gsub!("H", "H; # append patt-space to hold-space + \\n") end end if (mm.work == "l") then # replace if mm.work.length > 0 then mm.work.gsub!("l", "l; # print pattern-space unambiguously") end end if (mm.work == "n") then # replace if mm.work.length > 0 then mm.work.gsub!("n", "n; # print patt-space, get next line into patt-space ") end end if (mm.work == "N") then # replace if mm.work.length > 0 then mm.work.gsub!("N", "N; # append next line to patt-space + \\n ") end end if (mm.work == "p") then # replace if mm.work.length > 0 then mm.work.gsub!("p", "p; # print pattern-space") end end if (mm.work == "P") then # replace if mm.work.length > 0 then mm.work.gsub!("P", "P; # print pattern-space up to 1st newline") end end if (mm.work == "x") then # replace if mm.work.length > 0 then mm.work.gsub!("x", "x; # swap pattern-space with hold-space") end end if (mm.work == "z") then # replace if mm.work.length > 0 then mm.work.gsub!("z", "z; # delete pattern-space, NO restart") end end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "action*" mm.push(); break end # line numbers are also selectors if (mm.work.match?(/^[0-9]+$/)) then # while while /^[0-9]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); break end # $ is the last line of the file if (mm.work == "$") then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); break end # patterns - only execute commands if lines match if (mm.work == "/") then # save line/char number for error message mm.work = '' # clear mm.work += "near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.until("/"); if (!mm.work.end_with?("/")) then mm.work = '' # clear mm.work += "Missing '/' to terminate " mm.work += mm.tape[mm.cell] # get mm.work += "?\n" print mm.work # print exit end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # add any delimiter for pattern here, or none mm.work += "/" mm.work += mm.tape[mm.cell] # get mm.work += "/" mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "pattern*" mm.push(); break end # read transliteration commands if (mm.work == "y") then # save line/char number for error message mm.work = '' # clear mm.work += "near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # allow spaces between 'y' and '/' although gnu set doesn't mm.until("/"); if (!mm.work.end_with?("/") || !mm.work.match?(/^[ \/]+$/)) then mm.work = '' # clear mm.work += "Missing '/' after 'y' transliterate command\n" mm.work += "Or trailing characters " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print exit end # save line/char number for error message mm.work = '' # clear mm.work += "near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.until("/"); if (!mm.work.end_with?("/")) then mm.work = '' # clear mm.work += "Missing 2nd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print exit end if (mm.work == "/") then mm.work = '' # clear mm.work += "Sed syntax error? \n" mm.work += " Empty regex after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print exit end # replace pattern found if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "y/" mm.work += mm.tape[mm.cell] # get mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # save line/char number for error message mm.work += "near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.increment() # ++ mm.tape[mm.cell] = mm.work # put if mm.cell > 0 then mm.cell -= 1; end # -- mm.work = '' # clear mm.until("/"); if (!mm.work.end_with?("/")) then mm.work = '' # clear mm.work += "Missing 3rd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print exit end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += "/" mm.work += mm.tape[mm.cell] # get mm.work += "/" # y/// does not have modifiers (unlike s///) mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "action*" mm.push(); break end # various commands that have an option word parameter if (mm.work == "b" || mm.work == "e" || mm.work == "q" || mm.work == "Q" || mm.work == "t" || mm.work == "T") then # ignore intervening space if any mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # while while /^[ ]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.work = '' # clear # A bit more permissive that gnu-sed which doesn't allow # read to end in ';'. # whilenot while !/^[ ;}]+$/.match?(mm.peep) if mm.eof then break end mm.read() end # word parameters are optional to these commands # just add a space to separate command from parameter if (mm.work != "") then mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += " " mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap end mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work # swap mm.work += mm.tape[mm.cell] # get if (mm.work.start_with?("b")) then mm.work += "; # branch to