// code generated by "translate.go.pss" a pep script // http://bumble.sf.net/books/pars/tr/ // s.HasPrefix can be used instead of strings.HasPrefix package main import ( "fmt" "bufio" "strings" "strconv" "unicode" "io" "os" "unicode/utf8" ) // an alias for Println for brevity var pr = fmt.Println /* a machine for parsing */ type machine struct { SIZE int // how many elements in stack/tape/marks eof bool charsRead int linesRead int escape rune delimiter rune counter int work string stack []string cell int tape []string marks []string peep rune reader *bufio.Reader } // there is no special init for structures func newMachine(size int) *machine { mm := machine{SIZE: size} mm.eof = false // end of stream reached? mm.charsRead = 0 // how many chars already read mm.linesRead = 1 // how many lines already read mm.escape = '\\' mm.delimiter = '*' // push/pop delimiter (default "*") mm.counter = 0 // a counter for anything mm.work = "" // the workspace mm.stack = make([]string, 0, mm.SIZE) // stack for parse tokens mm.cell = 0 // current tape cell // slices not arrays mm.tape = make([]string, mm.SIZE, mm.SIZE) // a list of attribute for tokens mm.marks = make([]string, mm.SIZE, mm.SIZE) // marked tape cells // or dont initialse peep until "parse()" calls "setInput()" // check! this is not so simple mm.reader = bufio.NewReader(os.Stdin) var err error mm.peep, _, err = mm.reader.ReadRune() if err == io.EOF { mm.eof = true } else if err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) } return &mm } // method syntax. // func (v * vertex) abs() float64 { ... } // multiline strings are ok ? func (mm *machine) setInput(newInput string) { print("to be implemented") } // read one utf8 character from the input stream and // update the machine. func (mm *machine) read() { var err error if mm.eof { os.Exit(0) } mm.charsRead += 1 // increment lines if mm.peep == '\n' { mm.linesRead += 1 } mm.work += string(mm.peep) // check! mm.peep, _, err = mm.reader.ReadRune() if err == io.EOF { mm.eof = true } else if err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) } } // remove escape character: trivial method ? // check the python code for this, and the c code in machine.interp.c func (mm *machine) unescapeChar(c string) { // if mm.work = "" { return } mm.work = strings.Replace(mm.work, "\\"+c, c, -1) } // add escape character : trivial func (mm *machine) escapeChar(c string) { mm.work = strings.Replace(mm.work, c, "\\"+c, -1) } /** a helper function to count trailing escapes */ func (mm *machine) countEscapes(suffix string) int { count := 0 ss := "" if strings.HasSuffix(mm.work, suffix) { ss = strings.TrimSuffix(mm.work, suffix) } for (strings.HasSuffix(ss, string(mm.escape))) { ss = strings.TrimSuffix(ss, string(mm.escape)) count++ } return count } // reads the input stream until the workspace ends with the // given character or text, ignoring escaped characters func (mm *machine) until(suffix string) { if mm.eof { return; } // read at least one character mm.read() for true { if mm.eof { return; } // we need to count the mm.Escape chars preceding suffix // if odd, keep reading, if even, stop if strings.HasSuffix(mm.work, suffix) { if (mm.countEscapes(suffix) % 2 == 0) { return } } mm.read() } } /* increment the tape pointer (command ++) and grow the tape and marks arrays if necessary */ func (mm *machine) increment() { mm.cell++ if mm.cell >= len(mm.tape) { mm.tape = append(mm.tape, "") mm.marks = append(mm.marks, "") mm.SIZE++ } } /* pop the last token from the stack into the workspace */ func (mm *machine) pop() bool { if len(mm.stack) == 0 { return false } // no, get last element of stack // a[len(a)-1] mm.work = mm.stack[len(mm.stack)-1] + mm.work // a = a[:len(a)-1] mm.stack = mm.stack[:len(mm.stack)-1] if mm.cell > 0 { mm.cell -= 1 } return true } // push the first token from the workspace to the stack func (mm *machine) push() bool { // dont increment the tape pointer on an empty push if mm.work == "" { return false } // push first token, or else whole string if no delimiter aa := strings.SplitN(mm.work, string(mm.delimiter), 2) if len(aa) == 1 { mm.stack = append(mm.stack, mm.work) mm.work = "" } else { mm.stack = append(mm.stack, aa[0]+string(mm.delimiter)) mm.work = aa[1] } mm.increment() return true } // func (mm *machine) printState() { fmt.Printf("Stack %v Work[%s] Peep[%c] \n", mm.stack, mm.work, mm.peep) fmt.Printf("Acc:%v Esc:%c Delim:%c Chars:%v", mm.counter, mm.escape, mm.delimiter, mm.charsRead) fmt.Printf(" Lines:%v Cell:%v EOF:%v \n", mm.linesRead, mm.cell, mm.eof) for ii, vv := range mm.tape { fmt.Printf("%v [%s] \n", ii, vv) if ii > 4 { return; } } } // this is where the actual parsing/compiling code should go // so that it can be used by other go classes/objects. Also // should have a stream argument. func (mm *machine) parse(s string) { } /* adapt for clop and clip */ func trimLastChar(s string) string { r, size := utf8.DecodeLastRuneInString(s) if r == utf8.RuneError && (size == 0 || size == 1) { size = 0 } return s[:len(s)-size] } func (mm *machine) clip() { cc, _ := utf8.DecodeLastRuneInString(mm.work) mm.work = strings.TrimSuffix(mm.work, string(cc)) } func (mm *machine) clop() { _, size := utf8.DecodeRuneInString(mm.work) mm.work = mm.work[size:] } type fn func(rune) bool // eg unicode.IsLetter('x') /* check whether the string s only contains runes of type determined by the typeFn function */ func isInClass(typeFn fn, s string) bool { if s == "" { return false; } for _, rr := range s { //if !unicode.IsLetter(rr) { if !typeFn(rr) { return false } } return true } /* range in format 'a,z' */ func isInRange(start rune, end rune, s string) bool { if s == "" { return false; } for _, rr := range s { if (rr < start) || (rr > end) { return false } } return true } /* list of runes (unicode chars ) */ func isInList(list string, s string) bool { return strings.ContainsAny(s, list) } func main() { // This size needs to be big for some applications. Eg // calculating big palindromes. Really // it should be dynamically allocated. var size = 30000 var mm = newMachine(size); var restart = false; // the go compiler complains when modules are imported but // not used, also if vars are not used. if restart {}; unicode.IsDigit('0'); strconv.Itoa(0); for !mm.eof { /* lex block */ for true { mm.read() /* read */ //-------------- if (isInClass(unicode.IsSpace, mm.work)) { mm.work = "" // clear break } //--------------- // We can ellide all these single character tests, because // the stack token is just the character itself with a * // Braces {} are used for blocks of commands, ',' and '.' for concatenating // tests with OR or AND logic. 'B' and 'E' for begin and end // tests, '!' is used for negation, ';' is used to terminate a // command. if (mm.work == "{" || mm.work == "}" || mm.work == ";" || mm.work == "," || mm.work == "." || mm.work == "!" || mm.work == "B" || mm.work == "E") { mm.tape[mm.cell] = mm.work /* put */ mm.work += "*" mm.push(); break } //--------------- // format: "text" if (mm.work == "\"") { // save the start line number (for error messages) in case // there is no terminating quote character. mm.work = "" // clear mm.work += "line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") " mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "\"" mm.until("\""); if (!strings.HasSuffix(mm.work,"\"")) { mm.work = "" // clear mm.work += "Unterminated quote character (\") starting at " mm.work += mm.tape[mm.cell] /* get */ mm.work += " !\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "quote*" mm.push(); break } //--------------- // format: 'text', single quotes are converted to double quotes // but we must escape embedded double quotes. if (mm.work == "'") { // save the start line number (for error messages) in case // there is no terminating quote character. mm.work = "" // clear mm.work += "line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") " mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.until("'"); if (!strings.HasSuffix(mm.work,"'")) { mm.work = "" // clear mm.work += "Unterminated quote (') starting at " mm.work += mm.tape[mm.cell] /* get */ mm.work += "!\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // empty quotes '' may be legal, for example as the second arg // to replace. mm.clip() mm.work = strings.Replace(mm.work, string('"'), string(mm.escape)+string('"'), -1) mm.work = strings.Replace(mm.work, string(mm.escape)+string('\''), string('\''), -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "\"" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\"" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "quote*" mm.push(); break } //--------------- // formats: [:space:] [a-z] [abcd] [:alpha:] etc // should class tests really be multiline??! if (mm.work == "[") { // save the start line number (for error messages) in case // there is no terminating bracket character. mm.work = "" // clear mm.work += "line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") " mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "[" mm.until("]"); if (mm.work == "[]") { mm.work = "" // clear mm.work += "pep script error at line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " empty character class [] \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } if (!strings.HasSuffix(mm.work,"]")) { mm.work = "" // clear mm.work += "Unterminated class text ([...]) starting at " mm.work += mm.tape[mm.cell] /* get */ mm.work += "" mm.work += "\n class text can be used in tests or with the 'while' and " mm.work += "\n 'whilenot' commands. For example: " mm.work += "\n [:alpha:] { while [:alpha:]; print; clear; }" mm.work += "\n " fmt.Printf("%s", mm.work) // print os.Exit(0) } // need to escape quotes? mm.work = strings.Replace(mm.work, string('"'), string(mm.escape)+string('"'), -1) // the caret is not a negation operator in pep char classes // but dont have to escape caret because not using regexs // replace "^" "\\^"; // save the class on the tape mm.tape[mm.cell] = mm.work /* put */ mm.clop() mm.clop() if (!strings.HasPrefix(mm.work,"-")) { // not a range class, eg [a-z] but dont need to escape '-' chars // because not using regexs //clear; get; replace '-' '\\-'; put; /* nop eliminated */ } if (strings.HasPrefix(mm.work, "-")) { // a range class, eg [a-z], check if it is correct mm.clip() mm.clip() if (mm.work != "-") { mm.work = "" // clear mm.work += "Error in pep script at line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " Incorrect character range class " mm.work += mm.tape[mm.cell] /* get */ mm.work += "" mm.work += "\n For example:" mm.work += "\n [a-g] # correct" mm.work += "\n [f-gh] # error! \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // correct format, eg: [a-z] now translate to a // format that can be used by a go function mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "[", "'", -1) /* replace */ mm.work = strings.Replace(mm.work, "]", "'", -1) /* replace */ mm.work = strings.Replace(mm.work, "-", "','", -1) // now='a','z' mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "isInRange(" mm.work += mm.tape[mm.cell] /* get */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "class*" mm.push(); break } mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ // restore class text if (strings.HasPrefix(mm.work, "[:") && !strings.HasSuffix(mm.work,":]")) { mm.work = "" // clear mm.work += "malformed character class starting at " mm.work += mm.tape[mm.cell] /* get */ mm.work += "!\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // class in the form [:digit:] if (strings.HasPrefix(mm.work, "[:") && mm.work != "[:]") { mm.clip() mm.clip() mm.clop() mm.clop() // unicode posix character classes // Also, abbreviations (not implemented in pep.c yet.) // classes like [[:alpha:]] are only ascii in golang, but // see also unicode.IsLower('x'); // fix! if (mm.work == "alnum" || mm.work == "N") { mm.work = "" // clear mm.work += "isInClass(unicode.IsLetter" } //"alpha","A" { clear; add "[[:alpha:]]"; } if (mm.work == "alpha" || mm.work == "A") { mm.work = "" // clear mm.work += "isInClass(unicode.IsLetter" } // check! // non-standard posix class 'word' and ascii // check! if (mm.work == "ascii" || mm.work == "I") { mm.work = "" // clear mm.work += "isInRange(rune(0), rune(unicode.MaxASCII) " } if (mm.work == "word" || mm.work == "W") { mm.work = "" // clear mm.work += "isInClass(unicode.IsLetter" } // fix! if (mm.work == "blank" || mm.work == "B") { mm.work = "" // clear mm.work += "isInClass(unicode.IsSpace" } if (mm.work == "cntrl" || mm.work == "C") { mm.work = "" // clear mm.work += "isInClass(unicode.IsControl" } if (mm.work == "digit" || mm.work == "D") { mm.work = "" // clear mm.work += "isInClass(unicode.IsDigit" } if (mm.work == "graph" || mm.work == "G") { mm.work = "" // clear mm.work += "isInClass(unicode.IsGraphic" } if (mm.work == "lower" || mm.work == "L") { mm.work = "" // clear mm.work += "isInClass(unicode.IsLower" } if (mm.work == "print" || mm.work == "P") { mm.work = "" // clear mm.work += "isInClass(unicode.IsPrint" } if (mm.work == "punct" || mm.work == "T") { mm.work = "" // clear mm.work += "isInClass(unicode.IsPunct" } if (mm.work == "space" || mm.work == "S") { mm.work = "" // clear mm.work += "isInClass(unicode.IsSpace" } if (mm.work == "upper" || mm.work == "U") { mm.work = "" // clear mm.work += "isInClass(unicode.IsUpper" } if (mm.work == "xdigit" || mm.work == "X") { mm.work = "" // clear mm.work += "isInList(\"0123456789abcdefABCDEF\"" } if (!strings.HasPrefix(mm.work,"isIn") && !strings.HasPrefix(mm.work,"[")) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "pep script error at line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += "Unknown character class '" mm.work += mm.tape[mm.cell] /* get */ mm.work += "'\n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "class*" mm.push(); break } // must be a list eg [abcdefg] mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "[", "\"", -1) /* replace */ mm.work = strings.Replace(mm.work, "]", "\"", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "isInList(" mm.work += mm.tape[mm.cell] /* get */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "class*" mm.push(); break } //--------------- // formats: (eof) (EOF) (==) etc. if (mm.work == "(") { mm.work = "" // clear mm.until(")"); mm.clip() mm.tape[mm.cell] = mm.work /* put */ if (mm.work == "eof" || mm.work == "EOF") { mm.work = "" // clear mm.work += "eof*" mm.push(); break } if (mm.work == "==") { mm.work = "" // clear mm.work += "tapetest*" mm.push(); break } mm.work += " << unknown test near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script.\n" mm.work += " bracket () tests are \n" mm.work += " (eof) test if end of stream reached. \n" mm.work += " (==) test if workspace is same as current tape cell \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } //--------------- // multiline and single line comments, eg #... and #* ... *# if (mm.work == "#") { mm.work = "" // clear mm.read() /* read */ if (mm.work == "\n") { mm.work = "" // clear break } // checking for multiline comments of the form "#* \n\n\n *#" // these are just ignored at the moment (deleted) if (mm.work == "*") { // save the line number for possible error message later mm.work = "" // clear mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.until("*#"); if (strings.HasSuffix(mm.work, "*#")) { // convert to go comments (/*...*/ and //) // or just one multiline mm.clip() mm.clip() /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n//", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // create a "comment" parse token // comment-out this line to remove multiline comments from the // translated golang code // add "comment*"; push; break } // make an unterminated multiline comment an error // to ease debugging of scripts. mm.work = "" // clear mm.work += "unterminated multiline comment #* ... *# \n" mm.work += "stating at line number " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // single line comments. some will get lost. mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "//" mm.work += mm.tape[mm.cell] /* get */ mm.until("\n"); mm.clip() mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // comment out this below to remove single line comments // from the output mm.work += "comment*" mm.push(); break } //---------------------------------- // parse command words (and abbreviations) // legal characters for keywords (commands) if (!isInList("abcdefghijklmnopqrstuvwxyzBEKGPRUWS+-<>0^", mm.work)) { // error message about a misplaced character mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "!! Misplaced character '" mm.work += mm.tape[mm.cell] /* get */ mm.work += "' in script near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // my testclass implementation cannot handle complex lists // eg [a-z+-] this is why I have to write out the whole alphabet /* while */ for isInList("abcdefghijklmnopqrstuvwxyzBEOFKGPRUWS+-<>0^", string(mm.peep)) { if mm.eof { break } mm.read() } //---------------------------------- // KEYWORDS // here we can test for all the keywords (command words) and their // abbreviated one letter versions (eg: clip k, clop K etc). Then // we can print an error message and abort if the word is not a // legal keyword for the parse-edit language // make ll an alias for "lines" and cc an alias for chars if (mm.work == "ll") { mm.work = "" // clear mm.work += "lines" } if (mm.work == "cc") { mm.work = "" // clear mm.work += "chars" } // one letter command abbreviations if (mm.work == "a") { mm.work = "" // clear mm.work += "add" } if (mm.work == "k") { mm.work = "" // clear mm.work += "clip" } if (mm.work == "K") { mm.work = "" // clear mm.work += "clop" } if (mm.work == "D") { mm.work = "" // clear mm.work += "replace" } if (mm.work == "d") { mm.work = "" // clear mm.work += "clear" } if (mm.work == "t") { mm.work = "" // clear mm.work += "print" } if (mm.work == "p") { mm.work = "" // clear mm.work += "pop" } if (mm.work == "P") { mm.work = "" // clear mm.work += "push" } if (mm.work == "u") { mm.work = "" // clear mm.work += "unstack" } if (mm.work == "U") { mm.work = "" // clear mm.work += "stack" } if (mm.work == "G") { mm.work = "" // clear mm.work += "put" } if (mm.work == "g") { mm.work = "" // clear mm.work += "get" } if (mm.work == "x") { mm.work = "" // clear mm.work += "swap" } if (mm.work == ">") { mm.work = "" // clear mm.work += "++" } if (mm.work == "<") { mm.work = "" // clear mm.work += "--" } if (mm.work == "m") { mm.work = "" // clear mm.work += "mark" } if (mm.work == "M") { mm.work = "" // clear mm.work += "go" } if (mm.work == "r") { mm.work = "" // clear mm.work += "read" } if (mm.work == "R") { mm.work = "" // clear mm.work += "until" } if (mm.work == "w") { mm.work = "" // clear mm.work += "while" } if (mm.work == "W") { mm.work = "" // clear mm.work += "whilenot" } if (mm.work == "n") { mm.work = "" // clear mm.work += "count" } if (mm.work == "+") { mm.work = "" // clear mm.work += "a+" } if (mm.work == "-") { mm.work = "" // clear mm.work += "a-" } if (mm.work == "0") { mm.work = "" // clear mm.work += "zero" } if (mm.work == "c") { mm.work = "" // clear mm.work += "chars" } if (mm.work == "l") { mm.work = "" // clear mm.work += "lines" } if (mm.work == "^") { mm.work = "" // clear mm.work += "escape" } if (mm.work == "v") { mm.work = "" // clear mm.work += "unescape" } if (mm.work == "z") { mm.work = "" // clear mm.work += "delim" } if (mm.work == "S") { mm.work = "" // clear mm.work += "state" } if (mm.work == "q") { mm.work = "" // clear mm.work += "quit" } if (mm.work == "s") { mm.work = "" // clear mm.work += "write" } if (mm.work == "o") { mm.work = "" // clear mm.work += "nop" } if (mm.work == "rs") { mm.work = "" // clear mm.work += "restart" } if (mm.work == "rp") { mm.work = "" // clear mm.work += "reparse" } // some extra syntax for testeof and testtape if (mm.work == "" || mm.work == "") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "eof*" mm.push(); break } if (mm.work == "<==>") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "tapetest*" mm.push(); break } if (mm.work == "jump" || mm.work == "jumptrue" || mm.work == "jumpfalse" || mm.work == "testis" || mm.work == "testclass" || mm.work == "testbegins" || mm.work == "testends" || mm.work == "testeof" || mm.work == "testtape") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "The instruction '" mm.work += mm.tape[mm.cell] /* get */ mm.work += "' near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")\n" mm.work += "can be used in pep assembly code but not scripts. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // show information if these "deprecated" commands are used if (mm.work == "Q" || mm.work == "bail") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "The instruction '" mm.work += mm.tape[mm.cell] /* get */ mm.work += "' near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")\n" mm.work += "is no longer part of the pep language. \n" mm.work += "use 'quit' instead of 'bail'' \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "add" || mm.work == "clip" || mm.work == "clop" || mm.work == "replace" || mm.work == "upper" || mm.work == "lower" || mm.work == "cap" || mm.work == "clear" || mm.work == "print" || mm.work == "state" || mm.work == "pop" || mm.work == "push" || mm.work == "unstack" || mm.work == "stack" || mm.work == "put" || mm.work == "get" || mm.work == "swap" || mm.work == "++" || mm.work == "--" || mm.work == "mark" || mm.work == "go" || mm.work == "read" || mm.work == "until" || mm.work == "while" || mm.work == "whilenot" || mm.work == "count" || mm.work == "a+" || mm.work == "a-" || mm.work == "zero" || mm.work == "chars" || mm.work == "lines" || mm.work == "nochars" || mm.work == "nolines" || mm.work == "escape" || mm.work == "unescape" || mm.work == "delim" || mm.work == "quit" || mm.work == "write" || mm.work == "nop" || mm.work == "reparse" || mm.work == "restart") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "word*" mm.push(); break } //------------ // the .reparse command and "parse label" is a simple way to // make sure that all shift-reductions occur. It should be used inside // a block test, so as not to create an infinite loop. There is // no "goto" in go so we need to use labelled loops to // implement .reparse/parse> if (mm.work == "parse>") { mm.work = "" // clear mm.work += strconv.Itoa(mm.counter) /* count */ if (mm.work != "0") { mm.work = "" // clear mm.work += "script error:\n" mm.work += " extra parse> label at line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ".\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } mm.work = "" // clear mm.work += "# parse> parse label" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "parse>*" mm.push(); // use accumulator to indicate after parse> label mm.counter++ /* a+ */ break } // -------------------- // implement "begin-blocks", which are only executed // once, at the beginning of the script (similar to awk's BEGIN {} rules) if (mm.work == "begin") { mm.tape[mm.cell] = mm.work /* put */ mm.work += "*" mm.push(); break } mm.work += " << unknown command on line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of source file. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) // ---------------------------------- // PARSING PHASE: // Below is the parse/compile phase of the script. Here we pop tokens off the // stack and check for sequences of tokens eg "word*semicolon*". If we find a // valid series of tokens, we "shift-reduce" or "resolve" the token series eg // word*semicolon* --> command* // At the same time, we manipulate (transform) the attributes on the tape, as // required. break } if restart { restart = false; continue; } // parse block for true { //------------------------------------- // 2 tokens //------------------------------------- mm.pop(); mm.pop(); // All of the patterns below are currently errors, but may not // be in the future if we expand the syntax of the parse // language. Also consider: // begintext* endtext* quoteset* notclass*, !* ,* ;* B* E* // It is nice to trap the errors here because we can emit some // (hopefully not very cryptic) error messages with a line number. // Otherwise the script writer has to debug with // pep -a asm.pp -I scriptfile if (mm.work == "word*word*" || mm.work == "word*}*" || mm.work == "word*begintext*" || mm.work == "word*endtext*" || mm.work == "word*!*" || mm.work == "word*,*" || mm.work == "quote*word*" || mm.work == "quote*class*" || mm.work == "quote*state*" || mm.work == "quote*}*" || mm.work == "quote*begintext*" || mm.work == "quote*endtext*" || mm.work == "class*word*" || mm.work == "class*quote*" || mm.work == "class*class*" || mm.work == "class*state*" || mm.work == "class*}*" || mm.work == "class*begintext*" || mm.work == "class*endtext*" || mm.work == "class*!*" || mm.work == "notclass*word*" || mm.work == "notclass*quote*" || mm.work == "notclass*class*" || mm.work == "notclass*state*" || mm.work == "notclass*}*") { mm.work += " (Token stack) \nValue: \n" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\nValue: \n" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += "\n" mm.work += "Error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of pep script (missing semicolon?) \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "{*;*" || mm.work == ";*;*" || mm.work == "}*;*") { mm.push(); mm.push(); mm.work += "Error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of pep script: misplaced semi-colon? ; \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == ",*{*") { mm.push(); mm.push(); mm.work += "Error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: extra comma in list? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "command*;*" || mm.work == "commandset*;*") { mm.push(); mm.push(); mm.work += "Error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: extra semi-colon? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "!*!*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: \n double negation '!!' is not implemented \n" mm.work += " and probably won't be, because what would be the point? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "!*{*" || mm.work == "!*;*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: misplaced negation operator (!)? \n" mm.work += " The negation operator precedes tests, for example: \n" mm.work += " !B'abc'{ ... } or !(eof),!'abc'{ ... } \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == ",*command*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: misplaced comma? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "!*command*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (at char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") \n" mm.work += " The negation operator (!) cannot precede a command \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == ";*{*" || mm.work == "command*{*" || mm.work == "commandset*{*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: no test for brace block? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "{*}*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script: empty braces {}. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "B*class*" || mm.work == "E*class*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script:\n classes ([a-z], [:space:] etc). \n" mm.work += " cannot use the 'begin' or 'end' modifiers (B/E) \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "comment*{*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script: comments cannot occur between \n" mm.work += " a test and a brace ({). \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "}*command*") { mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script: extra closing brace '}' ?. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } //------------ // The .restart command jumps to the first instruction after the // begin block (if there is a begin block), or the first instruction // of the script. if (mm.work == ".*word*") { mm.work = "" // clear mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ if (mm.work == "restart") { mm.work = "" // clear mm.work += strconv.Itoa(mm.counter) /* count */ // this is the opposite of .reparse, using run-once loops // cant do next before label, infinite loop // need to set flag variable. I think go has labelled loops // before the parse> label if (mm.work == "0") { mm.work = "" // clear mm.work += "restart = true; continue // restart" } if (mm.work == "1") { mm.work = "" // clear mm.work += "break" } // after the parse> label mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "reparse") { mm.work = "" // clear mm.work += strconv.Itoa(mm.counter) /* count */ // check accumulator to see if we are in the "lex" block // or the "parse" block and adjust the .reparse compilation // accordingly. if (mm.work == "0") { mm.work = "" // clear mm.work += "break" } if (mm.work == "1") { mm.work = "" // clear mm.work += "continue" } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } mm.push(); mm.push(); mm.work += "error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script: \n" mm.work += " misplaced dot '.' (use for AND logic or in .reparse/.restart \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } //--------------------------------- // Compiling comments so as to transfer them to the java if (mm.work == "comment*command*" || mm.work == "command*comment*" || mm.work == "commandset*comment*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "comment*comment*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "comment*" mm.push(); continue } // ----------------------- // negated tokens. // This is a new more elegant way to negate a whole set of // tests (tokens) where the negation logic is stored on the // stack, not in the current tape cell. We just add "not" to // the stack token. // eg: ![:alpha:] ![a-z] ![abcd] !"abc" !B"abc" !E"xyz" // This format is used to indicate a negative test for // a brace block. eg: ![aeiou] { add "< not a vowel"; print; clear; } if (mm.work == "!*quote*" || mm.work == "!*class*" || mm.work == "!*begintext*" || mm.work == "!*endtext*" || mm.work == "!*eof*" || mm.work == "!*tapetest*") { // a simplification: store the token name "quote*/class*/..." // in the tape cell corresponding to the "!*" token. /* replace */ mm.work = strings.Replace(mm.work, "!*", "not", -1) mm.push(); // this was a bug?? a missing ++; ?? // now get the token-value mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.increment() /* ++ */ mm.work = "" // clear continue } //----------------------------------------- // format: E"text" or E'text' // This format is used to indicate a "workspace-ends-with" text before // a brace block. if (mm.work == "E*quote*") { mm.work = "" // clear mm.work += "endtext*" mm.push(); mm.work += mm.tape[mm.cell] /* get */ if (mm.work == "\"\"") { // empty argument is an error mm.work = "" // clear mm.work += "pep script error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " empty argument for end-test (E\"\") \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.increment() /* ++ */ mm.work = "" // clear continue } //----------------------------------------- // format: B"sometext" or B'sometext' // A 'B' preceding some quoted text is used to indicate a // 'workspace-begins-with' test, before a brace block. if (mm.work == "B*quote*") { mm.work = "" // clear mm.work += "begintext*" mm.push(); mm.work += mm.tape[mm.cell] /* get */ if (mm.work == "\"\"") { // empty argument is an error mm.work = "" // clear mm.work += "pep script error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " empty argument for begin-test (B\"\") \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.increment() /* ++ */ mm.work = "" // clear continue } //-------------------------------------------- // ebnf: command := word, ';' ; // formats: "pop; push; clear; print; " etc // all commands need to end with a semi-colon except for // .reparse and .restart if (mm.work == "word*;*") { mm.work = "" // clear // check if command requires parameter mm.work += mm.tape[mm.cell] /* get */ if (mm.work == "add" || mm.work == "until" || mm.work == "while" || mm.work == "whilenot" || mm.work == "mark" || mm.work == "go" || mm.work == "escape" || mm.work == "unescape" || mm.work == "delim" || mm.work == "replace") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "'" mm.work += mm.tape[mm.cell] /* get */ mm.work += "'" mm.work += " << command needs an argument, on line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script.\n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "clip") { mm.work = "" // clear mm.work += "mm.clip()" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "clop") { mm.work = "" // clear mm.work += "mm.clop()" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "clear") { mm.work = "" // clear mm.work += "mm.work = \"\" // clear" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "upper") { mm.work = "" // clear mm.work += "mm.work = strings.ToUpper(mm.work) /* upper */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "lower") { mm.work = "" // clear mm.work += "mm.work = strings.ToLower(mm.work) /* lower */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "cap") { mm.work = "" // clear mm.work += "mm.work = strings.Title(strings.ToLower(mm.work)) // capital" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "print") { mm.work = "" // clear mm.work += "fmt.Printf(\"%s\", mm.work) // print" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "state") { mm.work = "" // clear mm.work += "mm.printState() // state" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "pop") { mm.work = "" // clear mm.work += "mm.pop();" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "push") { mm.work = "" // clear mm.work += "mm.push();" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "unstack") { mm.work = "" // clear mm.work += "for mm.pop() {} /* unstack */ " mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "stack") { mm.work = "" // clear mm.work += "for mm.push() {} /* stack */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "put") { mm.work = "" // clear mm.work += "mm.tape[mm.cell] = mm.work /* put */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "get") { mm.work = "" // clear mm.work += "mm.work += mm.tape[mm.cell] /* get */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "swap") { mm.work = "" // clear mm.work += "mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work /* swap */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "++") { mm.work = "" // clear mm.work += "mm.increment() /* ++ */ \n" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "--") { mm.work = "" // clear mm.work += "if mm.cell > 0 { mm.cell-- } /* -- */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "read") { mm.work = "" // clear mm.work += "mm.read() /* read */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "count") { mm.work = "" // clear mm.work += "mm.work += strconv.Itoa(mm.counter) /* count */ " mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "a+") { mm.work = "" // clear mm.work += "mm.counter++ /* a+ */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "a-") { mm.work = "" // clear mm.work += "mm.counter-- /* a- */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "zero") { mm.work = "" // clear mm.work += "mm.counter = 0 /* zero */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "chars") { mm.work = "" // clear mm.work += "mm.work += strconv.Itoa(mm.charsRead) /* chars */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "lines") { mm.work = "" // clear mm.work += "mm.work += strconv.Itoa(mm.linesRead) /* lines */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "nochars") { mm.work = "" // clear mm.work += "mm.charsRead = 0 /* nochars */" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "nolines") { mm.work = "" // clear mm.work += "mm.linesRead = 0 /* nolines */" mm.tape[mm.cell] = mm.work /* put */ } // use a labelled loop to quit script. if (mm.work == "quit") { mm.work = "" // clear mm.work += "os.Exit(0)" mm.tape[mm.cell] = mm.work /* put */ } // inline this? if (mm.work == "write") { mm.work = "" // clear // go syntax mm.work += "/* write */\n" mm.work += "f, err := os.Create(\"sav.pp\")\n" mm.work += "if err != nil { panic(err) }\n" mm.work += "defer f.Close()\n" mm.work += "_, err = f.WriteString(mm.work)\n" mm.work += "if err != nil { panic(err) }\n" mm.work += "f.Sync()" mm.tape[mm.cell] = mm.work /* put */ } if (mm.work == "nop") { mm.work = "" // clear mm.work += "/* nop eliminated */" mm.tape[mm.cell] = mm.work /* put */ } mm.work = "" // clear mm.work += "command*" mm.push(); continue } //----------------------------------------- // ebnf: commandset := command , command ; if (mm.work == "command*command*" || mm.work == "commandset*command*") { mm.work = "" // clear mm.work += "commandset*" mm.push(); // format the tape attributes. Add the next command on a newline if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.increment() /* ++ */ mm.work = "" // clear continue } //------------------- // here we begin to parse "test*" and "ortestset*" and "andtestset*" // //------------------- // eg: B"abc" {} or E"xyz" {} // transform and markup the different test types if (mm.work == "begintext*,*" || mm.work == "endtext*,*" || mm.work == "quote*,*" || mm.work == "class*,*" || mm.work == "eof*,*" || mm.work == "tapetest*,*" || mm.work == "begintext*.*" || mm.work == "endtext*.*" || mm.work == "quote*.*" || mm.work == "class*.*" || mm.work == "eof*.*" || mm.work == "tapetest*.*" || mm.work == "begintext*{*" || mm.work == "endtext*{*" || mm.work == "quote*{*" || mm.work == "class*{*" || mm.work == "eof*{*" || mm.work == "tapetest*{*") { if (strings.HasPrefix(mm.work, "begin")) { mm.work = "" // clear mm.work += "strings.HasPrefix(mm.work, " mm.work += mm.tape[mm.cell] /* get */ mm.work += ")" } if (strings.HasPrefix(mm.work, "end")) { mm.work = "" // clear mm.work += "strings.HasSuffix(mm.work, " mm.work += mm.tape[mm.cell] /* get */ mm.work += ")" } if (strings.HasPrefix(mm.work, "quote")) { mm.work = "" // clear mm.work += "mm.work == " mm.work += mm.tape[mm.cell] /* get */ } if (strings.HasPrefix(mm.work, "class")) { // go condition syntax // use helper function isInClass mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work += ", mm.work)" } // clear the tapecell for testeof and testtape because // they take no arguments. if (strings.HasPrefix(mm.work, "eof")) { mm.work = "" // clear mm.tape[mm.cell] = mm.work /* put */ mm.work += "mm.eof" } if (strings.HasPrefix(mm.work, "tapetest")) { mm.work = "" // clear mm.tape[mm.cell] = mm.work /* put */ mm.work += "mm.work == mm.tape[mm.cell]" } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "test*" mm.push(); // the trick below pushes the right token back on the stack. mm.work += mm.tape[mm.cell] /* get */ mm.work += "*" mm.push(); continue } //------------------- // negated tests // eg: !B"xyz {} !(eof) {} !(==) {} // !E"xyz" {} // !"abc" {} // ![a-z] {} if (mm.work == "notbegintext*,*" || mm.work == "notendtext*,*" || mm.work == "notquote*,*" || mm.work == "notclass*,*" || mm.work == "noteof*,*" || mm.work == "nottapetest*,*" || mm.work == "notbegintext*.*" || mm.work == "notendtext*.*" || mm.work == "notquote*.*" || mm.work == "notclass*.*" || mm.work == "noteof*.*" || mm.work == "nottapetest*.*" || mm.work == "notbegintext*{*" || mm.work == "notendtext*{*" || mm.work == "notquote*{*" || mm.work == "notclass*{*" || mm.work == "noteof*{*" || mm.work == "nottapetest*{*") { if (strings.HasPrefix(mm.work, "notbegin")) { mm.work = "" // clear mm.work += "!strings.HasPrefix(mm.work," mm.work += mm.tape[mm.cell] /* get */ mm.work += ")" } if (strings.HasPrefix(mm.work, "notend")) { mm.work = "" // clear mm.work += "!strings.HasSuffix(mm.work," mm.work += mm.tape[mm.cell] /* get */ mm.work += ")" } if (strings.HasPrefix(mm.work, "notquote")) { mm.work = "" // clear mm.work += "mm.work != " mm.work += mm.tape[mm.cell] /* get */ } if (strings.HasPrefix(mm.work, "notclass")) { // produces !isInClass(.. or !isInList(.. or !isInRange(.. mm.work = "" // clear mm.work += "!" mm.work += mm.tape[mm.cell] /* get */ mm.work += ", mm.work)" } // clear the tapecell for testeof and testtape because // they take no arguments. if (strings.HasPrefix(mm.work, "noteof")) { mm.work = "" // clear mm.tape[mm.cell] = mm.work /* put */ mm.work += "!mm.eof" } if (strings.HasPrefix(mm.work, "nottapetest")) { mm.work = "" // clear mm.tape[mm.cell] = mm.work /* put */ mm.work += "mm.work != mm.tape[mm.cell]" } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "test*" mm.push(); // the trick below pushes the right token back on the stack. mm.work += mm.tape[mm.cell] /* get */ mm.work += "*" mm.push(); continue } //------------------- // 3 tokens //------------------- mm.pop(); //----------------------------- // some 3 token errors!!! // not a comprehensive list if (mm.work == "{*quote*;*" || mm.work == "{*begintext*;*" || mm.work == "{*endtext*;*" || mm.work == "{*class*;*" || mm.work == "commandset*quote*;*" || mm.work == "command*quote*;*") { mm.push(); mm.push(); mm.push(); mm.work += "[pep error]\n invalid syntax near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ")" mm.work += " of script (misplaced semicolon?) \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // to simplify subsequent tests, transmogrify a single command // to a commandset (multiple commands). if (mm.work == "{*command*}*") { mm.work = "" // clear mm.work += "{*commandset*}*" mm.push(); mm.push(); mm.push(); continue } // errors! mixing AND and OR concatenation if (mm.work == ",*andtestset*{*" || mm.work == ".*ortestset*{*") { // push the tokens back to make debugging easier mm.push(); mm.push(); mm.push(); mm.work += " error: mixing AND (.) and OR (,) concatenation in \n" mm.work += " in pep script near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += ") \n" mm.work += " " mm.work += "\n For example:" mm.work += "\n B\".\".!E\"/\".[abcd./] { print; } # Correct!" mm.work += "\n B\".\".!E\"/\",[abcd./] { print; } # Error! \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } //-------------------------------------------- // ebnf: command := keyword , quoted-text , ";" ; // format: add "text"; if (mm.work == "word*quote*;*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ if (mm.work == "replace") { // error mm.work += "< command requires 2 parameters, not 1 \n" mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // disable "while " syntax since it is not necessary if (mm.work == "while" || mm.work == "whilenot") { mm.work += "[error] while/whilenot should not have quoted \n" mm.work += "single character argument. Use eg: while [x] instead\n" mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // check whether argument is single character, otherwise // throw an error. Also, convert to single quotes for go // which is if (mm.work == "delim" || mm.work == "escape" || mm.work == "unescape") { // This is trickier than I thought it would be. mm.work = "" // clear mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ // check that arg not empty, (but an empty quote is ok // for the second arg of 'replace' if (mm.work == "\"\"") { mm.work = "" // clear mm.work += "[pep error] near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (or char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " command '" if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += mm.tape[mm.cell] /* get */ mm.increment() /* ++ */ mm.work += "' " mm.work += "cannot have an empty argument (\"\") \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // quoted text has the quotes still around it. // also handle escape characters like \n \r etc // Also, unicode escape sequences like \u0x2222 mm.clip() mm.clop() mm.clip() if (mm.work != "" && !strings.HasPrefix(mm.work,"\\")) { mm.work = "" // clear mm.work += "[pep error] Pep script error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " command '" mm.work += mm.tape[mm.cell] /* get */ mm.work += "' takes only a single character argument. \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } if (strings.HasPrefix(mm.work, "\\")) { mm.clip() if (mm.work != "") { mm.work = "" // clear mm.work += "[pep error] Pep script error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " command '" if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += mm.tape[mm.cell] /* get */ mm.work += "' takes only a single character argument or \n" mm.work += " and escaped single char eg: \n \t \f etc" fmt.Printf("%s", mm.work) // print os.Exit(0) } } // replace double quotes with single for argument mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work = strings.Replace(mm.work, string('\''), string(mm.escape)+string('\''), -1) mm.work = strings.Replace(mm.work, string(mm.escape)+string('"'), string('"'), -1) mm.clip() mm.clop() mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "'" mm.work += mm.tape[mm.cell] /* get */ mm.work += "'" mm.tape[mm.cell] = mm.work /* put */ // re-get the command name if mm.cell > 0 { mm.cell-- } /* -- */ mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ } if (mm.work == "mark") { mm.work = "" // clear mm.work += "mm.marks[mm.cell] = " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += " /* mark */" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "go") { mm.work = "" // clear // convert to go mm.work += "/* go to mark */\n" mm.work += "for ii := range mm.marks {\n" mm.work += " if mm.marks[ii] == " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += " {\n" mm.work += " mm.cell = ii; break; \n" mm.work += " }\n" mm.work += "}" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "delim") { mm.work = "" // clear // the delimiter should be a single character, no? mm.work += "mm.delimiter = " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += " /* delim */ " mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "add") { mm.work = "" // clear mm.work += "mm.work += " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ // handle multiline text check this! \\n or \n /* replace */ mm.work = strings.Replace(mm.work, "\n", "\"\nmm.work += \"\\n", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } // not used now if (mm.work == "while") { mm.work = "" // clear mm.work += "/* while */\n" mm.work += "for mm.peep == " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += " {\n" mm.work += " if mm.eof { break }\n mm.read()\n" mm.work += "}" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } // not used now if (mm.work == "whilenot") { mm.work = "" // clear mm.work += "/* whilenot */\n" mm.work += "for mm.peep != " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += " {\n" mm.work += " if mm.eof { break }\n mm.read()\n}" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "until") { mm.work = "" // clear mm.work += "mm.until(" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ // error until cannot have empty argument if (mm.work == "mm.until(\"\"") { mm.work = "" // clear mm.work += "Pep script error near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " empty argument for 'until' \n" mm.work += " " mm.work += "\n For example:" mm.work += "\n until '.txt'; until \">\"; # correct " mm.work += "\n until ''; until \"\"; # errors! \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // handle multiline argument /* replace */ mm.work = strings.Replace(mm.work, "\n", "\\n", -1) mm.work += ");" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "escape") { mm.work = "" // clear mm.increment() /* ++ */ // argument still has quotes around it // it should be a single character since this has been previously // checked. mm.work += "mm.work = strings.Replace(mm.work, string(" mm.work += mm.tape[mm.cell] /* get */ mm.work += "), string(mm.escape)+string(" mm.work += mm.tape[mm.cell] /* get */ mm.work += "), -1)" if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } // replace \n with n for example (only 1 character) if (mm.work == "unescape") { mm.work = "" // clear mm.increment() /* ++ */ // use the machine escape char mm.work += "mm.work = strings.Replace(mm.work, string(mm.escape)+string(" mm.work += mm.tape[mm.cell] /* get */ mm.work += "), string(" mm.work += mm.tape[mm.cell] /* get */ mm.work += "), -1)" if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } // error, superfluous argument mm.work += ": command does not take an argument \n" mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " of script. \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear //state os.Exit(0) } //---------------------------------- // format: "while [:alpha:] ;" or whilenot [a-z] ; if (mm.work == "word*class*;*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ if (mm.work == "while") { mm.work = "" // clear mm.work += "/* while */\n" mm.work += "for " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += ", string(mm.peep)) {\n" mm.work += " if mm.eof { break }\n mm.read()\n}" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } if (mm.work == "whilenot") { mm.work = "" // clear mm.work += "/* whilenot */\n" mm.work += "for !" mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += ", string(mm.peep)) {\n" mm.work += " if mm.eof { break; }\n" mm.work += " mm.read()\n}" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } // error mm.work += " < command cannot have a class argument \n" mm.work += "line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ": error in script \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // arrange the parse> label loops if (mm.eof) { if (mm.work == "commandset*parse>*commandset*" || mm.work == "command*parse>*commandset*" || mm.work == "commandset*parse>*command*" || mm.work == "command*parse>*command*") { mm.work = "" // clear // indent both code blocks mm.work += " " mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n ", -1) // go has labelled loops, but complains if the label // is not used. So we have to use the flag technique // to make restart with before/after/without the parse> label /* replace */ mm.work = strings.Replace(mm.work, "continue // restart", "break // restart", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += " " mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n ", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ // add a block so that .reparse works before the parse> label. // it appears that go has labelled loops mm.work += "\n/* lex block */\n" mm.work += "for true { \n" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n break \n}\n" mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += "if restart { restart = false; continue; }" // indent code block // add " "; get; replace "\n" "\n "; put; clear; // using flag technique mm.work += "\n// parse block \n" mm.work += "for true {\n" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n break \n} // parse\n" if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "commandset*" mm.push(); continue } } // ------------------------------- // 4 tokens // ------------------------------- mm.pop(); //------------------------------------- // bnf: command := replace , quote , quote , ";" ; // example: replace "and" "AND" ; if (mm.work == "word*quote*quote*;*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ // check! go replace syntax // not used here // match1, err := regexp.MatchString("geeks", str) if (mm.work == "replace") { //--------------------------- // a command plus 2 arguments, eg replace "this" "that" mm.work = "" // clear mm.work += "/* replace */\n" // add 'if mm.work != "" { \n'; mm.work += "mm.work = strings.Replace(mm.work, " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ mm.work += ", " mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ mm.work += ", -1)\n" if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); continue } mm.work += "Pep script error on line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += " (character " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.work += "): \n" mm.work += " command does not take 2 quoted arguments. \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } //------------------------------------- // format: begin { #* commands *# } // "begin" blocks which are only executed once (they // will are assembled before the "start:" label. They must come before // all other commands. // "begin*{*command*}*", if (mm.work == "begin*{*commandset*}*") { mm.work = "" // clear mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "beginblock*" mm.push(); continue } // ------------- // parses and compiles concatenated tests // eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... // these 2 tests should be all that is necessary if (mm.work == "test*,*ortestset*{*" || mm.work == "test*,*test*{*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work += " || " mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "ortestset*{*" mm.push(); mm.push(); continue } // dont mix AND and OR concatenations // ------------- // AND logic // parses and compiles concatenated AND tests // eg: 'a',B'b',E'c',[def],[:space:],[g-k] { ... // it is possible to elide this block with the negated block // for compactness but maybe readability is not as good. // negated tests can be chained with non negated tests. // eg: B'http' . !E'.txt' { ... } if (mm.work == "test*.*andtestset*{*" || mm.work == "test*.*test*{*") { mm.work = "" // clear mm.work += mm.tape[mm.cell] /* get */ mm.work += " && " mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "andtestset*{*" mm.push(); mm.push(); continue } //------------------------------------- // we should not have to check for the {*command*}* pattern // because that has already been transformed to {*commandset*}* if (mm.work == "test*{*commandset*}*" || mm.work == "andtestset*{*commandset*}*" || mm.work == "ortestset*{*commandset*}*") { mm.work = "" // clear // indent the code for readability mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += " " mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n ", -1) mm.tape[mm.cell] = mm.work /* put */ if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work = "" // clear mm.work += "if (" mm.work += mm.tape[mm.cell] /* get */ mm.work += ") {\n" mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ // block end required mm.work += "\n}" if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "command*" mm.push(); // always reparse/compile continue } // ------------- // multi-token end-of-stream errors // not a comprehensive list of errors... if (mm.eof) { if (strings.HasSuffix(mm.work, "begintext*") || strings.HasSuffix(mm.work, "endtext*") || strings.HasSuffix(mm.work, "test*") || strings.HasSuffix(mm.work, "ortestset*") || strings.HasSuffix(mm.work, "andtestset*")) { mm.work += " Error near end of script at line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ". Test with no brace block? \n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (strings.HasSuffix(mm.work, "quote*") || strings.HasSuffix(mm.work, "class*") || strings.HasSuffix(mm.work, "word*")) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "Error at end of pep script near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ": missing semi-colon? \n" mm.work += "Parse stack: " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (strings.HasSuffix(mm.work, "{*") || strings.HasSuffix(mm.work, "}*") || strings.HasSuffix(mm.work, ";*") || strings.HasSuffix(mm.work, ",*") || strings.HasSuffix(mm.work, ".*") || strings.HasSuffix(mm.work, "!*") || strings.HasSuffix(mm.work, "B*") || strings.HasSuffix(mm.work, "E*")) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "Error: misplaced terminal character at end of script! (line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += "). \n" mm.work += "Parse stack: " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } } // put the 4 (or less) tokens back on the stack mm.push(); mm.push(); mm.push(); mm.push(); if (mm.eof) { fmt.Printf("%s", mm.work) // print mm.work = "" // clear // create the virtual machine object code and save it // somewhere on the tape. mm.work += "" mm.work += "\n// code generated by \"translate.go.pss\" a pep script" mm.work += "\n// http://bumble.sf.net/books/pars/tr/" mm.work += "\n" mm.work += "\n" mm.work += "\n// s.HasPrefix can be used instead of strings.HasPrefix" mm.work += "\npackage main" mm.work += "\nimport (" mm.work += "\n \"fmt\"" mm.work += "\n \"bufio\" " mm.work += "\n \"strings\"" mm.work += "\n \"strconv\"" mm.work += "\n \"unicode\"" mm.work += "\n \"io\" " mm.work += "\n \"os\"" mm.work += "\n \"unicode/utf8\"" mm.work += "\n)" mm.work += "\n" mm.work += "\n// an alias for Println for brevity" mm.work += "\nvar pr = fmt.Println" mm.work += "\n" mm.work += "\n /* a machine for parsing */" mm.work += "\n type machine struct {" mm.work += "\n SIZE int // how many elements in stack/tape/marks" mm.work += "\n eof bool" mm.work += "\n charsRead int" mm.work += "\n linesRead int" mm.work += "\n escape rune " mm.work += "\n delimiter rune" mm.work += "\n counter int" mm.work += "\n work string" mm.work += "\n stack []string" mm.work += "\n cell int" mm.work += "\n tape []string" mm.work += "\n marks []string" mm.work += "\n peep rune" mm.work += "\n reader *bufio.Reader" mm.work += "\n }" mm.work += "\n" mm.work += "\n // there is no special init for structures" mm.work += "\n func newMachine(size int) *machine { " mm.work += "\n mm := machine{SIZE: size}" mm.work += "\n mm.eof = false // end of stream reached?" mm.work += "\n mm.charsRead = 0 // how many chars already read" mm.work += "\n mm.linesRead = 1 // how many lines already read" mm.work += "\n mm.escape = '\\\\'" mm.work += "\n mm.delimiter = '*' // push/pop delimiter (default \"*\")" mm.work += "\n mm.counter = 0 // a counter for anything" mm.work += "\n mm.work = \"\" // the workspace" mm.work += "\n mm.stack = make([]string, 0, mm.SIZE) // stack for parse tokens " mm.work += "\n mm.cell = 0 // current tape cell" mm.work += "\n // slices not arrays" mm.work += "\n mm.tape = make([]string, mm.SIZE, mm.SIZE) // a list of attribute for tokens " mm.work += "\n mm.marks = make([]string, mm.SIZE, mm.SIZE) // marked tape cells" mm.work += "\n // or dont initialse peep until \"parse()\" calls \"setInput()\"" mm.work += "\n // check! this is not so simple" mm.work += "\n mm.reader = bufio.NewReader(os.Stdin)" mm.work += "\n var err error" mm.work += "\n mm.peep, _, err = mm.reader.ReadRune()" mm.work += "\n if err == io.EOF { " mm.work += "\n mm.eof = true " mm.work += "\n } else if err != nil {" mm.work += "\n fmt.Fprintln(os.Stderr, \"error:\", err)" mm.work += "\n os.Exit(1)" mm.work += "\n }" mm.work += "\n return &mm" mm.work += "\n }" mm.work += "\n" mm.work += "\n // method syntax." mm.work += "\n // func (v * vertex) abs() float64 { ... }" mm.work += "\n // multiline strings are ok ?" mm.work += "\n" mm.work += "\n func (mm *machine) setInput(newInput string) {" mm.work += "\n print(\"to be implemented\")" mm.work += "\n }" mm.work += "\n" mm.work += "\n // read one utf8 character from the input stream and " mm.work += "\n // update the machine." mm.work += "\n func (mm *machine) read() { " mm.work += "\n var err error" mm.work += "\n if mm.eof { os.Exit(0) }" mm.work += "\n mm.charsRead += 1" mm.work += "\n // increment lines" mm.work += "\n if mm.peep == '\\n' { mm.linesRead += 1 }" mm.work += "\n mm.work += string(mm.peep)" mm.work += "\n // check!" mm.work += "\n mm.peep, _, err = mm.reader.ReadRune()" mm.work += "\n if err == io.EOF { " mm.work += "\n mm.eof = true " mm.work += "\n } else if err != nil {" mm.work += "\n fmt.Fprintln(os.Stderr, \"error:\", err)" mm.work += "\n os.Exit(1)" mm.work += "\n }" mm.work += "\n }" mm.work += "\n" mm.work += "\n // remove escape character: trivial method ?" mm.work += "\n // check the python code for this, and the c code in machine.interp.c" mm.work += "\n func (mm *machine) unescapeChar(c string) {" mm.work += "\n // if mm.work = \"\" { return }" mm.work += "\n mm.work = strings.Replace(mm.work, \"\\\\\"+c, c, -1)" mm.work += "\n }" mm.work += "\n" mm.work += "\n // add escape character : trivial" mm.work += "\n func (mm *machine) escapeChar(c string) {" mm.work += "\n mm.work = strings.Replace(mm.work, c, \"\\\\\"+c, -1)" mm.work += "\n }" mm.work += "\n" mm.work += "\n /** a helper function to count trailing escapes */" mm.work += "\n func (mm *machine) countEscapes(suffix string) int {" mm.work += "\n count := 0" mm.work += "\n ss := \"\"" mm.work += "\n if strings.HasSuffix(mm.work, suffix) {" mm.work += "\n ss = strings.TrimSuffix(mm.work, suffix)" mm.work += "\n }" mm.work += "\n for (strings.HasSuffix(ss, string(mm.escape))) { " mm.work += "\n ss = strings.TrimSuffix(ss, string(mm.escape))" mm.work += "\n count++" mm.work += "\n }" mm.work += "\n return count" mm.work += "\n }" mm.work += "\n" mm.work += "\n // reads the input stream until the workspace ends with the" mm.work += "\n // given character or text, ignoring escaped characters" mm.work += "\n func (mm *machine) until(suffix string) {" mm.work += "\n if mm.eof { return; }" mm.work += "\n // read at least one character" mm.work += "\n mm.read()" mm.work += "\n for true { " mm.work += "\n if mm.eof { return; }" mm.work += "\n // we need to count the mm.Escape chars preceding suffix" mm.work += "\n // if odd, keep reading, if even, stop" mm.work += "\n if strings.HasSuffix(mm.work, suffix) {" mm.work += "\n if (mm.countEscapes(suffix) % 2 == 0) { return }" mm.work += "\n }" mm.work += "\n mm.read()" mm.work += "\n }" mm.work += "\n } " mm.work += "\n" mm.work += "\n /* increment the tape pointer (command ++) and grow the " mm.work += "\n tape and marks arrays if necessary */" mm.work += "\n func (mm *machine) increment() { " mm.work += "\n mm.cell++" mm.work += "\n if mm.cell >= len(mm.tape) {" mm.work += "\n mm.tape = append(mm.tape, \"\")" mm.work += "\n mm.marks = append(mm.marks, \"\")" mm.work += "\n mm.SIZE++" mm.work += "\n }" mm.work += "\n }" mm.work += "\n" mm.work += "\n /* pop the last token from the stack into the workspace */" mm.work += "\n func (mm *machine) pop() bool { " mm.work += "\n if len(mm.stack) == 0 { return false }" mm.work += "\n // no, get last element of stack" mm.work += "\n // a[len(a)-1]" mm.work += "\n mm.work = mm.stack[len(mm.stack)-1] + mm.work" mm.work += "\n // a = a[:len(a)-1]" mm.work += "\n mm.stack = mm.stack[:len(mm.stack)-1]" mm.work += "\n if mm.cell > 0 { mm.cell -= 1 }" mm.work += "\n return true" mm.work += "\n }" mm.work += "\n" mm.work += "\n // push the first token from the workspace to the stack " mm.work += "\n func (mm *machine) push() bool { " mm.work += "\n // dont increment the tape pointer on an empty push" mm.work += "\n if mm.work == \"\" { return false }" mm.work += "\n // push first token, or else whole string if no delimiter" mm.work += "\n aa := strings.SplitN(mm.work, string(mm.delimiter), 2)" mm.work += "\n if len(aa) == 1 {" mm.work += "\n mm.stack = append(mm.stack, mm.work)" mm.work += "\n mm.work = \"\"" mm.work += "\n } else {" mm.work += "\n mm.stack = append(mm.stack, aa[0]+string(mm.delimiter))" mm.work += "\n mm.work = aa[1]" mm.work += "\n }" mm.work += "\n mm.increment()" mm.work += "\n return true" mm.work += "\n }" mm.work += "\n" mm.work += "\n // " mm.work += "\n func (mm *machine) printState() { " mm.work += "\n fmt.Printf(\"Stack %v Work[%s] Peep[%c] \\n\", mm.stack, mm.work, mm.peep)" mm.work += "\n fmt.Printf(\"Acc:%v Esc:%c Delim:%c Chars:%v\", " mm.work += "\n mm.counter, mm.escape, mm.delimiter, mm.charsRead)" mm.work += "\n fmt.Printf(\" Lines:%v Cell:%v EOF:%v \\n\", mm.linesRead, mm.cell, mm.eof)" mm.work += "\n for ii, vv := range mm.tape {" mm.work += "\n fmt.Printf(\"%v [%s] \\n\", ii, vv)" mm.work += "\n if ii > 4 { return; }" mm.work += "\n }" mm.work += "\n } " mm.work += "\n" mm.work += "\n // this is where the actual parsing/compiling code should go" mm.work += "\n // so that it can be used by other go classes/objects. Also" mm.work += "\n // should have a stream argument." mm.work += "\n func (mm *machine) parse(s string) {" mm.work += "\n } " mm.work += "\n" mm.work += "\n /* adapt for clop and clip */" mm.work += "\n func trimLastChar(s string) string {" mm.work += "\n r, size := utf8.DecodeLastRuneInString(s)" mm.work += "\n if r == utf8.RuneError && (size == 0 || size == 1) {" mm.work += "\n size = 0" mm.work += "\n }" mm.work += "\n return s[:len(s)-size]" mm.work += "\n }" mm.work += "\n" mm.work += "\n func (mm *machine) clip() {" mm.work += "\n cc, _ := utf8.DecodeLastRuneInString(mm.work)" mm.work += "\n mm.work = strings.TrimSuffix(mm.work, string(cc)) " mm.work += "\n }" mm.work += "\n" mm.work += "\n func (mm *machine) clop() {" mm.work += "\n _, size := utf8.DecodeRuneInString(mm.work) " mm.work += "\n mm.work = mm.work[size:] " mm.work += "\n }" mm.work += "\n" mm.work += "\n type fn func(rune) bool" mm.work += "\n // eg unicode.IsLetter('x')" mm.work += "\n /* check whether the string s only contains runes of type" mm.work += "\n determined by the typeFn function */" mm.work += "\n" mm.work += "\n func isInClass(typeFn fn, s string) bool {" mm.work += "\n if s == \"\" { return false; }" mm.work += "\n for _, rr := range s {" mm.work += "\n //if !unicode.IsLetter(rr) {" mm.work += "\n if !typeFn(rr) { return false }" mm.work += "\n }" mm.work += "\n return true" mm.work += "\n }" mm.work += "\n" mm.work += "\n /* range in format 'a,z' */" mm.work += "\n func isInRange(start rune, end rune, s string) bool {" mm.work += "\n if s == \"\" { return false; }" mm.work += "\n for _, rr := range s {" mm.work += "\n if (rr < start) || (rr > end) { return false }" mm.work += "\n }" mm.work += "\n return true" mm.work += "\n }" mm.work += "\n" mm.work += "\n /* list of runes (unicode chars ) */" mm.work += "\n func isInList(list string, s string) bool {" mm.work += "\n return strings.ContainsAny(s, list)" mm.work += "\n }" mm.work += "\n" mm.work += "\nfunc main() {" mm.work += "\n // This size needs to be big for some applications. Eg " mm.work += "\n // calculating big palindromes. Really " mm.work += "\n // it should be dynamically allocated." mm.work += "\n var size = 30000" mm.work += "\n var mm = newMachine(size);" mm.work += "\n var restart = false; " mm.work += "\n // the go compiler complains when modules are imported but" mm.work += "\n // not used, also if vars are not used." mm.work += "\n if restart {}; unicode.IsDigit('0'); strconv.Itoa(0);" mm.work += "\n " // save the code in the current tape cell mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear //--------------------- // check if the script correctly parsed (there should only // be one token on the stack, namely "commandset*" or "command*"). mm.pop(); mm.pop(); if (mm.work == "commandset*" || mm.work == "command*") { mm.work = "" // clear // indent generated code for readability. mm.work += " " mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n ", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // restore the go preamble from the tape mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ //add 'script: \n'; mm.work += "for !mm.eof { \n" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n }\n" mm.work += "}\n" mm.work += "\n\n// end of generated 'go' code\n" // put a copy of the final compilation into the tapecell // so it can be inspected interactively. mm.tape[mm.cell] = mm.work /* put */ fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } if (mm.work == "beginblock*commandset*" || mm.work == "beginblock*command*") { mm.work = "" // clear // indentation not needed here //add ""; get; //replace "\n" "\n"; put; clear; // indent main code for readability. mm.increment() /* ++ */ mm.work += " " mm.work += mm.tape[mm.cell] /* get */ /* replace */ mm.work = strings.Replace(mm.work, "\n", "\n ", -1) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear if mm.cell > 0 { mm.cell-- } /* -- */ // get go preamble (Machine object definition) from tape mm.increment() /* ++ */ mm.increment() /* ++ */ mm.work += mm.tape[mm.cell] /* get */ if mm.cell > 0 { mm.cell-- } /* -- */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" mm.increment() /* ++ */ // a labelled loop for "quit" (but quit can just exit?) //add "script: \n"; mm.work += "for !mm.eof { \n" mm.work += mm.tape[mm.cell] /* get */ // end block marker required in 'go' mm.work += "\n }\n" mm.work += "}\n" mm.work += "\n\n// end of generated golang code\n" // put a copy of the final compilation into the tapecell // for interactive debugging. mm.tape[mm.cell] = mm.work /* put */ fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } mm.push(); mm.push(); // try to explain some more errors for mm.pop() {} /* unstack */ if (strings.HasPrefix(mm.work, "parse>")) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "[error] pep syntax error:\n" mm.work += " The parse> label cannot be the 1st item \n" mm.work += " of a script \n" fmt.Printf("%s", mm.work) // print os.Exit(0) } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work = "" // clear mm.work += "[error] After compiling with 'translate.go.pss' (at EOF): \n " mm.work += " parse error in input script. \n " fmt.Printf("%s", mm.work) // print mm.work = "" // clear for mm.pop() {} /* unstack */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "Parse stack: " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" mm.work += " * debug script " mm.work += " >> pep -If script -i 'some input' \n " mm.work += " * debug compilation. \n " mm.work += " >> pep -Ia asm.pp script' \n " fmt.Printf("%s", mm.work) // print mm.work = "" // clear os.Exit(0) } // not eof // there is an implicit .restart command here (jump start) break } // parse } } // end of generated 'go' code