#!/usr/bin/ruby # code generated by "translate.ruby.pss" a pep script # http://bumble.sf.net/books/pars/tr/ # require 'something' class Machine # make a new machine attr_accessor :work, :charsRead, :linesRead, :escape, :delimiter, :counter, :stack, :tape, :cell, :marks, :eof, :peep def initialize() @size = 300 # how many elements in stack/tape/marks @eof = false # end of stream reached? @charsRead = 0 # how many chars already read @linesRead = 1 # how many lines already read @escape = "\\" @delimiter = "*" # push/pop delimiter (default "*") @counter = 0 # a counter for anything @work = "" # the workspace @stack = [] # stack for parse tokens @cell = 0 # current tape cell @tape = Array.new(@size) {String.new} # a list of attribute for tokens @marks = Array.new(@size) {String.new} # marked tape cells # or dont initialse peep until "parse()" calls "setInput()" @peep = ARGF.readchar end def setInput(newInput) puts "to be implemented" end # read one character from the input stream and # update the machine. def read if @eof then exit end @charsRead += 1; # increment lines if @peep == "\n" then @linesRead += 1 end @work += @peep @peep = ARGF.readchar if @peep.nil? then @eof = true; end end # test if all chars in workspace are in unicode category def isInCategory(cat) #for ch in @work # if not category(ch).start_with?(cat) then return false end #return True end # this needs to actually walk the string # eg "abcab\cabc" # not trivial def unescapeChar(c) @work.gsub!("#{@escape}#{c}", c) end # add escape character : trivial? def escapeChar(c) @work.gsub!(c, @escape+c) end # a helper for the multiescape until bug def countEscaped(suffix) count = 0 #s = @work.sub(/#{suffix}$/, "") s = @work.delete_suffix(suffix) while s.end_with?(@escape) count += 1 s.delete_suffix!(@escape) end # puts "count=#{count}" return count end # reads the input stream until the workspace end with text def until(suffix) # read at least one character if @eof then return end self.read() while true do if @eof then return end # need to count the @escape chars preceding suffix # if odd, keep reading, if even, stop if @work.end_with?(suffix) then if (self.countEscaped(suffix).even?) then return end end self.read() end end # this implements the ++ command incrementing the tape pointer # and growing the tape and marks arrays if required def increment() @cell += 1 if @cell >= @size then @tape.append("") @marks.append("") @size += 1 end end # pop the first token from the stack into the workspace */ def pop() if @stack.length == 0 then return false end @work = @stack.pop() + @work if @cell > 0 then @cell -= 1 end return true end # push the first token from the workspace to the stack def push() # dont increment the tape pointer on an empty push if @work == "" then return false end # need to get this from the delimiter. iFirst = @work.index(@delimiter) if iFirst.nil? @stack.push(@work); @work = ""; return true # also @stack << @work end # s[i..j] means all chars from i to j # s[i,n] means n chars from i @stack.push(@work[0..iFirst]) @work = @work[iFirst+1..-1] self.increment() return true end def printState() puts "Stack[#{@stack.join(', ')}] Work[#{@work}] Peep[#{@peep}]" puts "Acc:#{@counter} Esc:#{@escape} Delim:#{@delimiter} Chars:#{@charsRead}" + " Lines:#{@linesRead} Cell:#{@cell}" end def goToMark(mark) ii = @marks.find_index(mark) if !ii.nil? then @cell = ii else print("bad mark '" + mark + "'!") exit end end # this is where the actual parsing/compiling code should go # so that it can be used by other ruby classes/objects. Also # should have a stream argument. def parse(s) # a reset or "setinput()" method would be useful to parse a # different string/file/stream, without creating a new # machine object. # could use code like this to check if input is string or file #if isinstance(s, file) print("") # @reset(s) # @reader = s #elseif isinstance(s, string) #f = StringIO.StringIO("test") #for line in f print(line) #else # f = STDIN #end #puts "not implemented" end end # end of Machine class definition # will become: # mm.parse(sys.stdin) or # mm.parse("abcdef") or # open f; mm.parse(f) # the restart flag, which allows .restart to work before the # parse label, in languages (like ruby) that dont have # labelled loops restart = false mm = Machine.new while !mm.eof do # lex block while true mm.read() # read # Unlike Crockfords grammar, I will just completely ignore whitespace, # but this may not be acceptable in a rigorous application. Also, I # am just using the ctype.h definition of whitespace, whatever that # may be. if (mm.work.match?(/^[[:space:]]+$/)) then mm.work = '' # clear break end if (mm.work.match?(/^[0-9]+$/)) then # while while /^[0-9]+$/.match?(mm.peep) if mm.eof then break end mm.read() end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "integer*" mm.push(); break end if (mm.work.match?(/^[a-z]+$/) || mm.work.match?(/^[A-Z]+$/)) then # while while /^[a-z]+$/.match?(mm.peep) if mm.eof then break end mm.read() end if (mm.work != "true" && mm.work != "false" && mm.work != "null" && mm.work != "e" && mm.work != "E") then # handle error mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "Unknown value '" mm.work += mm.tape[mm.cell] # get mm.work += "' at line " mm.work += mm.linesRead.to_s # lines mm.work += " (character " mm.work += mm.charsRead.to_s # chars mm.work += ").\n" print mm.work # print exit end mm.tape[mm.cell] = mm.work # put if (mm.work == "e" || mm.work == "E") then mm.work = '' # clear mm.work += "E*" mm.push(); break end mm.work = '' # clear mm.work += "value*" mm.push(); break end if (mm.work == "\"") then # save line number for error message mm.work = '' # clear mm.work += mm.linesRead.to_s # lines mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.until("\""); if (mm.eof) then mm.work = '' # clear mm.work += "Unterminated quote (\") char, starting at line " mm.work += mm.tape[mm.cell] # get mm.work += "\n" print mm.work # print exit end if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "string*" mm.push(); break end # literal tokens if (mm.work == "." || mm.work == "," || mm.work == ":" || mm.work == "-" || mm.work == "+" || mm.work == "[" || mm.work == "]" || mm.work == "{" || mm.work == "}") then mm.tape[mm.cell] = mm.work # put mm.work += "*" mm.push(); break end # here check if the workspace is empty. If not it is an error. if (mm.work != "") then mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.work += ": unquoted '" mm.work += mm.tape[mm.cell] # get mm.work += "' character.\n" print mm.work # print exit end break end if restart then restart = false; next; end # parse block while true # This is for visualising stack reductions when debugging #unstack; add "\n"; print; clip; stack; # The parse/compile phase # -------------- # 2 tokens mm.pop(); mm.pop(); #----------- # Two token errors (not necessarily a complete list) # comma errors if (mm.work == "{*,*" || mm.work == ",*}*" || mm.work == "[*,*" || mm.work == ",*,*" || mm.work == ",*]*") then mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.work += " (extra or misplaced ',' comma)\n" print mm.work # print exit end # exponent errors (e/E must be followed by an int or signed int) if (mm.work != "E*" && mm.work.start_with?("E*") && !mm.work.end_with?("integer*") && !mm.work.end_with?("-*") && !mm.work.end_with?("+*") && !mm.work.end_with?("number*")) then mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced exponent 'e' or 'E' \n" mm.work += "In JSON syntax, e/E may only precede an int or signed int.\n" mm.work += "for example: 33e+01 \n" print mm.work # print exit end # exponent errors (e/E must be followed by an int or signed int) if (mm.work != "E*" && mm.work.end_with?("E*") && !mm.work.start_with?("integer*") && !mm.work.start_with?("sign.integer*") && !mm.work.start_with?("decimal*")) then mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced exponent 'e' or 'E' \n" mm.work += "In JSON syntax, e/E may only be preceded by an int, signed int.\n" mm.work += "or decimal eg: 33e+01 \n" print mm.work # print exit end # sign errors (+/- must be followed by an integer if (mm.work != "-*" && mm.work.start_with?("-*") && !mm.work.end_with?("integer*")) then mm.work = '' # clear mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced negative '-' sign\n" mm.work += "In JSON syntax, - may only precede a number \n" mm.work += "for example: -33.01 \n" print mm.work # print exit end # sign errors (+/- must be followed by an integer) if (mm.work != "+*" && mm.work.start_with?("+*") && !mm.work.end_with?("integer*")) then mm.work = '' # clear mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced positive '+' sign\n" mm.work += "In JSON syntax, + may only precede a number \n" mm.work += "for example: +33.01 \n" print mm.work # print exit end # dot errors (. must be followed by an integer) if (mm.work != ".*" && mm.work.start_with?(".*") && !mm.work.end_with?("integer*")) then mm.work = '' # clear mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced dot '.' sign\n" mm.work += "In JSON syntax, dots may only be used in decimal numbers \n" mm.work += "for example: -33.01 \n" print mm.work # print exit end # dot errors (. must be preceded by an integer or signed integer) if (mm.work != ".*" && mm.work.end_with?(".*") && !mm.work.start_with?("integer*") && !mm.work.start_with?("sign.integer*")) then mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced dot '.' sign\n" mm.work += "In JSON syntax, dots may only be used in decimal numbers \n" mm.work += "for example: -33.01, but .44 is not a legal JSON number \n" print mm.work # print exit end # eg errors "items*:*","members*:*",",*:*","[*:*","{*:*" # A colon must be preceded by a string. Using logic if (mm.work.end_with?(":*") && mm.work != ":*" && !mm.work.start_with?("string*")) then mm.work = '' # clear mm.work += "Json syntax error near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.work += " (misplaced colon ':') \n" mm.work += "A ':' can only occur after a string key in an object structure \n" mm.work += "Example: {\"cancelled\":true} \n" print mm.work # print exit end # more colon errors if (mm.work != ":*" && mm.work.start_with?(":*")) then if (mm.work.end_with?("}*") || mm.work.end_with?(",*") || mm.work.end_with?("]*")) then mm.work = '' # clear mm.work += "JSON syntax error near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.work += " (misplaced colon ':' or missing value?) \n" mm.work += "A ':' only occur as part of an object member \n" mm.work += "Example: {\"cancelled\":true} \n" print mm.work # print exit end end # catch object member errors # also need to check that not only 1 token in on the stack # hence the !"member*" construct if (mm.work.start_with?("member*") || mm.work.start_with?("members*")) then if (mm.work != "member*" && mm.work != "members*" && !mm.work.end_with?(",*") && !mm.work.end_with?("}*")) then mm.work = '' # clear mm.work += "JSON syntax error after object member near line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += ")\n" print mm.work # print exit end end # catch array errors if (mm.work.start_with?("items*") && mm.work != "items*" && !mm.work.end_with?(",*") && !mm.work.end_with?("]*")) then mm.work = '' # clear mm.work += "Error after an array item near line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += ")\n" print mm.work # print exit end if (mm.work.start_with?("array*") || mm.work.start_with?("object*")) then if (mm.work != "array*" && mm.work != "object*" && !mm.work.end_with?(",*") && !mm.work.end_with?("}*") && !mm.work.end_with?("]*")) then mm.work = '' # clear mm.work += "JSON syntax error near line " mm.work += mm.linesRead.to_s # lines mm.work += " char " mm.work += mm.charsRead.to_s # chars mm.work += ")\n" print mm.work # print exit end end # invalid string sequence if (mm.work.start_with?("string*")) then if (mm.work != "string*" && !mm.work.end_with?(",*") && !mm.work.end_with?("]*") && !mm.work.end_with?("}*") && !mm.work.end_with?(":*")) then mm.work = '' # clear mm.work += "JSON syntax error after a string near line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += ")\n" print mm.work # print exit end end # transmogrify into array item, start array if (mm.work == "[*number*" || mm.work == "[*string*" || mm.work == "[*value*" || mm.work == "[*array*" || mm.work == "[*object*") then mm.work = '' # clear mm.work += "[*items*" mm.push(); mm.push(); next end # exponents (e-403, E+120, E04), this slightly simplifies number parsing if (mm.work == "E*sign.integer*" || mm.work == "E*integer*") then mm.work = '' # clear mm.work += "^" mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "exponent*" mm.push(); next end # JSON scientific format (23e-10, -201E+33) if (mm.work == "integer*exponent*" || mm.work == "sign.integer*exponent*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get # enforce multidigit zero rules # But is "0e44" legal JSON number syntax? That would seem odd # if it is. if (mm.work.start_with?("+")) then mm.work = '' # clear mm.work += "JSON syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): \n" mm.work += "In JSON syntax, the number part may not have a positive sign \n" mm.work += "eg: +0.12e34 (error!) \n" mm.work += "eg: 0.12e+34 (OK!) \n" print mm.work # print exit end if (mm.work.start_with?("-")) then if !mm.work.empty? then # clop mm.work = mm.work[1..-1]; # clop end end if (mm.work != "0" && mm.work.start_with?("0")) then mm.work = '' # clear mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): \n" mm.work += "In JSON syntax, multidigit numbers must begin with 1-9 \n" mm.work += "eg: -0234.01E+9 (error) \n" print mm.work # print exit end mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); next end # JSON scientific format (-0.23e10, 10.2E+33) if (mm.work == "decimal*exponent*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "number*" mm.push(); next end # where does a number terminate, this is the problem # It terminates at the tokens ,* }* ]* and maybe space but # this script doesnt have a space* token. if (mm.work == "sign.integer*,*" || mm.work == "integer*,*") then mm.work = '' # clear mm.work += "number*,*" mm.push(); mm.push(); next end # transmog if (mm.work == "sign.integer*]*" || mm.work == "integer*]*") then mm.work = '' # clear mm.work += "items*]*" mm.push(); mm.push(); next end if (mm.work == "sign.integer*}*" || mm.work == "integer*}*") then mm.work = '' # clear mm.work += "number*}*" mm.push(); mm.push(); next end # convert decimals to numbers with token lookahead if (mm.work == "decimal*}*" || mm.work == "decimal*]*" || mm.work == "decimal*,*") then # replace if mm.work.length > 0 then mm.work.gsub!("decimal*", "number*") end mm.push(); mm.push(); next end # signed numbers if (mm.work == "-*integer*" || mm.work == "+*integer*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sign.integer*" mm.push(); next end # signed numbers if (mm.work == "-*integer*" || mm.work == "+*integer*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get mm.increment() # ++ mm.work += mm.tape[mm.cell] # get if mm.cell > 0 then mm.cell -= 1; end # -- mm.tape[mm.cell] = mm.work # put mm.work = '' # clear mm.work += "sign.integer*" mm.push(); next end # empty arrays are legal json if (mm.work == "[*]*") then mm.work = '' # clear mm.work += "array*" mm.push(); next end # empty objects are legal json if (mm.work == "{*}*") then mm.work = '' # clear mm.work += "object*" mm.push(); next end # -------------- # 3 tokens mm.pop(); #--------------- # Some three token errors # Object errors # A negative logic doesnt work because of the lookahead required for numbers if (mm.work == "{*string*}*" || mm.work == "{*integer*}*" || mm.work == "{*sign.integer*}*" || mm.work == "{*array*}*" || mm.work == "{*object*}*" || mm.work == "{*value*}*" || mm.work == "{*decimal*}*") then mm.work = '' # clear mm.work += "Json syntax error near line " mm.work += mm.linesRead.to_s # lines mm.work += ", char " mm.work += mm.charsRead.to_s # chars mm.work += " (misplaced brace '}' or bad object) \n" mm.work += "A '}' can only occur to terminate an object structure \n" mm.work += "Example: {\"hour\":21.00, \"cancelled\":true} \n" print mm.work # print exit end # transmogrify number into array item if (mm.work == "[*number*,*") then mm.work = '' # clear mm.work += "[*items*,*" mm.push(); mm.push(); mm.push(); next end # decimal numbers eg -4.334 or +4.3 or 0.1 if (mm.work == "sign.integer*.*integer*") then mm.work = '' # clear mm.work += mm.tape[mm.cell] # get if (mm.work.start_with?("+")) then #error, no positive signed decimals in JSON mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): misplaced positive '+' sign\n" mm.work += "In JSON syntax, decimal numbers are not positively signed\n" mm.work += "eg: +33.01 (error) \n" print mm.work # print exit end if (mm.work.start_with?("-0") && mm.work != "-0") then mm.work += "Json syntax error at line " mm.work += mm.linesRead.to_s # lines mm.work += " (char " mm.work += mm.charsRead.to_s # chars mm.work += "): \n" mm.work += "In JSON syntax, multidigit numbers must begin with 1-9 \n" mm.work += "eg: -0234.01E+9 (error) \n" print mm.work # print exit end mm.work = '' # clear mm.work += "decimal*" mm.push(); next end # decimal numbers eg -4.334 or +4.3 or 0.1 if (mm.work == "integer*.*integer*") then mm.work = '' # clear mm.work += "decimal*" mm.push(); next end # arrays, if (mm.work == "[*items*]*" || mm.work == "[*number*]*") then mm.work = '' # clear mm.work += "array*" mm.push(); next end # if (mm.work == "items*,*string*" || mm.work == "items*,*value*" || mm.work == "items*,*array*" || mm.work == "items*,*object*" || mm.work == "items*,*number*") then mm.work = '' # clear mm.work += "items*" mm.push(); next end # object members #"string*:*integer*", if (mm.work == "string*:*number*" || mm.work == "string*:*string*" || mm.work == "string*:*value*" || mm.work == "string*:*object*" || mm.work == "string*:*array*") then mm.work = '' # clear mm.work += "member*" mm.push(); next end # multiple elements of an object if (mm.work == "member*,*member*" || mm.work == "members*,*member*") then mm.work = '' # clear mm.work += "members*" mm.push(); next end # if (mm.work == "{*members*}*" || mm.work == "{*member*}*") then mm.work = '' # clear mm.work += "object*" mm.push(); next end mm.pop(); # -------------- # 4 tokens if (mm.work == "items*,*items*,*" || mm.work == "items*,*number*,*") then mm.work = '' # clear mm.work += "items*,*" mm.push(); mm.push(); next end # numbers require a lookahead token, unfortunately if (mm.work == "string*:*number*,*") then mm.work = '' # clear mm.work += "member*,*" mm.push(); mm.push(); next end # numbers require a lookahead token, unfortunately if (mm.work == "string*:*number*}*") then mm.work = '' # clear mm.work += "member*}*" mm.push(); mm.push(); next end # multiple elements of an object with lookahead if (mm.work == "member*,*member*,*" || mm.work == "members*,*member*,*") then mm.work = '' # clear mm.work += "members*,*" mm.push(); mm.push(); next end # multiple elements of an object with lookahead if (mm.work == "member*,*member*}*" || mm.work == "members*,*member*}*") then mm.work = '' # clear mm.work += "members*}*" mm.push(); mm.push(); next end mm.pop(); # -------------- # 5 tokens # need this clumsy rule for numbers which get resolved when # a ] is seen. This is the lookahead if (mm.work == "[*items*,*items*]*" || mm.work == "[*items*,*number*]*") then mm.work = '' # clear mm.work += "array*" mm.push(); next end mm.push(); mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) then while mm.pop() do next end # unstack if (mm.work == "object*" || mm.work == "array*" || mm.work == "value*" || mm.work == "string*" || mm.work == "integer*" || mm.work == "decimal*" || mm.work == "number*") then while mm.push() do next end # stack mm.work += "(Appears to be) valid JSON syntax. Top level structure was '" print mm.work # print mm.work = '' # clear mm.pop(); if !mm.work.empty? then # clip mm.work = mm.work[0..-2] # clip end mm.work += "'\n" print mm.work # print mm.work = '' # clear exit end while mm.push() do next end # stack mm.work += "(maybe) Invalid JSON \n" mm.work += "The parse stack was \n" print mm.work # print mm.work = '' # clear while mm.pop() do next end # unstack mm.work += "\n" print mm.work # print end break end # parse end # end of generated code