#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end # multiline strings are ok in ruby def printSizeError() puts " Tape max size exceeded! tape maximum size = #{@size} tape cell (current) = #{@cell} You can increase the @size value in the ruby script but normally this error indicates an error in your parsing script. The only exception would be massively nested structures in the source data." end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read # require 'io/console' if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end # @peep = STDIN.getch # and check for control d # STDIN.getch.tap { |char| exit(1) if char == "u0003" } #if !@peep then @eof = true end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 s = @work s.sub!(/#{suffix}$/, "") while s.end_with?(@escape) count++ s.sub!(/#{@escape}$/, "") end end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # no, we need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix) % 2 == 0) then return end end self.read() end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] if @cell < @size then @cell += 1 else self.printSizeError(); exit end return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new mm.work += "" mm.work += "\n An attempt at basic natural language parsing. " mm.work += "\n Use the following words in simple sentences: " mm.work += "\n" mm.work += "\n articles: the, this, her, his, a, one, some, " mm.work += "\n preposition: up, in, at, on, with, under, to" mm.work += "\n adjectives: simple, big, small, blue, beautiful, small," mm.work += "\n nouns: flower, tree, dog, house, horse, girl, fish, meat," mm.work += "\n verbs: runs, eats, sleeps, is, grows, digs, sings" mm.work += "\n" mm.work += "\n End the sentence with a full stop \".\"" mm.work += "\n eg: the small dog eats fish." mm.work += "\n eg: the simple horse runs on the house ." mm.work += "\n .\n" print mm.work # print mm.work = '' # clear while !mm.eof do # lex block while true mm.read() # read if (mm.work.match?(/^[[:alpha:]]+$/)) then # while while /^[[:alpha:]]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put if (mm.work == "the" || mm.work == "this" || mm.work == "her" || mm.work == "his" || mm.work == "a" || mm.work == "one" || mm.work == "some") then mm.work = '' # clear mm.work += "article*" mm.push(); break end if (mm.work == "up" || mm.work == "in" || mm.work == "at" || mm.work == "on" || mm.work == "with" || mm.work == "under" || mm.work == "to") then mm.work = '' # clear mm.work += "preposition*" mm.push(); break end if (mm.work == "simple" || mm.work == "big" || mm.work == "small" || mm.work == "blue" || mm.work == "beautiful" || mm.work == "small") then mm.work = '' # clear mm.work += "adjective*" mm.push(); break end if (mm.work == "flower" || mm.work == "tree" || mm.work == "dog" || mm.work == "house" || mm.work == "horse" || mm.work == "girl" || mm.work == "fish" || mm.work == "meat") then mm.work = '' # clear mm.work += "noun*" mm.push(); break end if (mm.work == "runs" || mm.work == "eats" || mm.work == "sleeps" || mm.work == "is" || mm.work == "grows" || mm.work == "digs" || mm.work == "sings") then mm.work = '' # clear mm.work += "verb*" mm.push(); break end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "<" mm.work += mm.tape[mm.cell] # get mm.work += ">" mm.work += " Sorry, don't understand that word! \n" print mm.work # print mm.work = '' # clear exit end # use a full-stop to complete sentence if (mm.work == ".") then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "dot*" mm.push(); end # ignore every thing else mm.work = '' # clear break end if restart then restart = false; next; end # parse block while true # 2 tokens mm.pop(); mm.pop(); if (mm.work == "article*noun*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "nounphrase*" mm.push(); next end if (mm.work == "verb*preposition*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "verbphrase*" mm.push(); next end # 3 tokens mm.pop(); if (mm.work == "noun*verb*dot*" || mm.work == "nounphrase*verb*dot*" || mm.work == "noun*verbphrase*dot*" || mm.work == "nounphrase*verbphrase*dot*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sentence*" mm.push(); next end if (mm.work == "article*adjective*noun*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "nounphrase*" mm.push(); next end # 4 tokens mm.pop(); if (mm.work == "nounphrase*verb*noun*dot*" || mm.work == "noun*verb*noun*dot*" || mm.work == "nounphrase*verb*nounphrase*dot*" || mm.work == "noun*verb*nounphrase*dot*" || mm.work == "nounphrase*verbphrase*nounphrase*dot*" || mm.work == "noun*verbphrase*nounphrase*dot*" || mm.work == "nounphrase*verbphrase*noun*dot*" || mm.work == "noun*verbphrase*noun*dot*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get mm.work += " " mm.cell += 1 # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sentence*" mm.push(); next end mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) then mm.pop(); mm.pop(); if (mm.work == "sentence*") then mm.work = '' # clear mm.work += "It's an english sentence! \n(" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" mm.work += "But it may not make sense! \n" print mm.work # print mm.work = '' # clear exit end if (mm.work == "nounphrase*") then mm.work = '' # clear mm.work += "its a noun-phrase! (" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" print mm.work # print mm.work = '' # clear exit end if (mm.work == "verbphrase*") then mm.work = '' # clear mm.work += "its a verb-phrase! (" mm.work += mm.tape[mm.cell] # get mm.work += ") \n" print mm.work # print mm.work = '' # clear exit end mm.push(); mm.push(); mm.work += "nope, not a sentence. \n" print mm.work # print mm.work = '' # clear mm.work += "The parse stack was: \n " print mm.work # print mm.work = '' # clear while mm.pop() do next end # unstack mm.work += "\n" print mm.work # print exit end break end # parse end # end of generated code