#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 #s = @work.sub(/#{suffix}$/, "") s = @work.delete_suffix(suffix) while s.end_with?(@escape) count += 1 s.delete_suffix!(@escape) end # puts "count=#{count}" return count end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix).even?) then return end end self.read() end end # this implements the ++ command incrementing the tape pointer # and growing the tape and marks arrays if required def increment() @cell += 1 if @cell >= @size then @tape.append("") @marks.append("") @size += 1 end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] self.increment() return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end def goToMark(mark) ii = @marks.find_index(mark) if !ii.nil? then @cell = ii else print("bad mark '" + mark + "'!") exit end end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new # the empty recordset trick to simplify the grammar rules mm.work += "recordset*" mm.push(); while !mm.eof do # lex block while true mm.read() # read if (mm.work.match?(/^[\n]+$/)) then # just to debug # lines; print; mm.work = '' # clear end # whilenot while !/^[\n]+$/.match?(mm.peep) if mm.eof then break end mm.read() end # ignore blank lines if (mm.work == "" || mm.work.match?(/^[[:space:]]+$/)) then mm.work = '' # clear break end mm.tape[mm.cell] = mm.work # put if (mm.work.start_with?("#") && mm.work != "#") then if (mm.work.match?(/^[#0123456789]+$/)) then mm.work = '' # clear mm.work += "timestamp*" mm.push(); break end mm.work = '' # clear mm.work += "comment*" mm.push(); break end # tag the command as trivial if it is # for later removal. If there is a comment above it we may keep it anyway # tag as trivial all commands less than 5 characters if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end if (mm.work == "") then mm.work = '' # clear mm.work += "trivial*" mm.push(); break end mm.work = '' # clear mm.work += mm.tape[mm.cell] # get if (mm.work.start_with?("df ") || mm.work == "df" || mm.work.start_with?("du ") || mm.work.start_with?("mv ") || mm.work.start_with?("cp ") || mm.work.start_with?("less ") || mm.work.start_with?("vim ") || mm.work.start_with?("rm ") || mm.work.start_with?("mkdir ") || mm.work.start_with?("find ") || mm.work.start_with?("locate ") || mm.work.start_with?("cd ") || mm.work == "cd" || mm.work.start_with?("ls ") || mm.work == "ls" || mm.work == "pwd" || mm.work == "hist" || mm.work == "books" || mm.work == "bk" || mm.work == "ho" || mm.work == "updatedb" || mm.work == "bashrc" || mm.work == "vimrc" || mm.work == "os" || mm.work == "cos" || mm.work == "ccos" || mm.work == "make") then mm.work = '' # clear mm.work += "trivial*" mm.push(); break end mm.work = '' # clear mm.work += "command*" mm.push(); break end if restart then restart = false; next; end # parse block while true # for debugging # add "line "; lines; add " char "; chars; add ": "; print; clear; #add "line "; lines; add ": "; print; clear; #unstack; print; stack; add "\n"; print; clear; # ---------------- # 2 tokens mm.pop(); mm.pop(); # ignore duplicated timestamps. if (mm.work == "timestamp*timestamp*") then mm.work = '' # clear mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "timestamp*" mm.push(); next end # handle multiline comments if (mm.work == "comment*comment*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "comment*" mm.push(); next end # dont need because an initial recordset always exists #"record*record*","recordset*record*" { if (mm.work == "recordset*record*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear # debug code # a+; count; add " record!\n"; print; clear; mm.work += "recordset*" mm.push(); next end # this will be compiled differently from r*r* if (mm.work == "recordset*command*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "recordset*" mm.push(); next end if (mm.work == "recordset*trivial*") then mm.counter += 1 # a+ # count filtered commands mm.work = '' # clear mm.work += "recordset*" mm.push(); next end if (mm.eof) then # clean up trailing comments etc if (mm.work == "recordset*timestamp*" || mm.work == "recordset*comment*") then mm.work = '' # clear mm.work += "recordset*record*" mm.push(); mm.push(); next end end # 3 tokens mm.pop(); # remove trivial commands without comments if (mm.work == "recordset*timestamp*trivial*") then mm.counter += 1 # a+ # count filtered commands mm.work = '' # clear mm.work += "recordset*" mm.push(); next end # ignore duplicated timestamps. if (mm.work == "timestamp*comment*timestamp*") then mm.work = '' # clear mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.increment() # ++ mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put if mm.cell > 0 then mm.cell -= 1; end # -- mm.work = '' # clear mm.work += "comment*timestamp*" mm.push(); mm.push(); next end # amalgamate comments before and after the timestamp if (mm.work == "comment*timestamp*comment*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.increment() # ++ mm.work += "\n" mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "comment*timestamp*" mm.push(); mm.push(); next end if (mm.work == "comment*timestamp*command*" || mm.work == "comment*timestamp*trivial*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "record*" mm.push(); next end # dont remove trivial commands with comments if (mm.work == "timestamp*comment*command*" || mm.work == "timestamp*comment*trivial*") then mm.work = '' # clear # switch the order to make comment precede timestamp mm.increment() # ++ mm.work += mm.tape[mm.cell] # get mm.work += "\n" if mm.cell > 0 then mm.cell -= 1; end # -- mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "record*" mm.push(); next end if (mm.work == "recordset*timestamp*command*") then mm.work = '' # clear mm.increment() # ++ mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put if mm.cell > 0 then mm.cell -= 1; end # -- mm.work = '' # clear mm.work += "recordset*record*" mm.push(); mm.push(); next end # resolve commands and trivial command with comments if (mm.work == "recordset*comment*command*" || mm.work == "recordset*comment*trivial*") then mm.work = '' # clear mm.increment() # ++ mm.work += mm.tape[mm.cell] # get mm.work += "\n" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put if mm.cell > 0 then mm.cell -= 1; end # -- mm.work = '' # clear mm.work += "recordset*record*" mm.push(); mm.push(); next end mm.push(); mm.push(); mm.push(); if (mm.eof) then mm.pop(); mm.pop(); if (mm.work != "recordset*") then mm.push(); mm.push(); mm.work += "# History file did not parse well!\n" print mm.work # print mm.work = '' # clear mm.work += "# Parse stack was: " print mm.work # print mm.work = '' # clear while mm.pop() do next end # unstack mm.work += "\n" print mm.work # print exit end if (mm.work == "recordset*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += "\n# History file parsed and filtered by pars/eg/bash.history.pss \n" mm.work += "# " mm.work += mm.counter.to_s # count mm.work += " trivial commands (without preceding comments) were removed.\n" print mm.work # print end end break end # parse end # end of generated code