// code generated by "translate.go.pss" a pep script // http://bumble.sf.net/books/pars/tr/ // s.HasPrefix can be used instead of strings.HasPrefix package main import ( "fmt" "bufio" "strings" "strconv" "unicode" "io" "os" "unicode/utf8" ) // an alias for Println for brevity var pr = fmt.Println /* a machine for parsing */ type machine struct { SIZE int // how many elements in stack/tape/marks eof bool charsRead int linesRead int escape rune delimiter rune counter int work string stack []string cell int tape []string marks []string peep rune reader *bufio.Reader } // there is no special init for structures func newMachine(size int) *machine { mm := machine{SIZE: size} mm.eof = false // end of stream reached? mm.charsRead = 0 // how many chars already read mm.linesRead = 1 // how many lines already read mm.escape = '\\' mm.delimiter = '*' // push/pop delimiter (default "*") mm.counter = 0 // a counter for anything mm.work = "" // the workspace mm.stack = make([]string, 0, mm.SIZE) // stack for parse tokens mm.cell = 0 // current tape cell // slices not arrays mm.tape = make([]string, mm.SIZE, mm.SIZE) // a list of attribute for tokens mm.marks = make([]string, mm.SIZE, mm.SIZE) // marked tape cells // or dont initialse peep until "parse()" calls "setInput()" // check! this is not so simple mm.reader = bufio.NewReader(os.Stdin) var err error mm.peep, _, err = mm.reader.ReadRune() if err == io.EOF { mm.eof = true } else if err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) } return &mm } // method syntax. // func (v * vertex) abs() float64 { ... } // multiline strings are ok ? func (mm *machine) setInput(newInput string) { print("to be implemented") } // read one utf8 character from the input stream and // update the machine. func (mm *machine) read() { var err error if mm.eof { os.Exit(0) } mm.charsRead += 1 // increment lines if mm.peep == '\n' { mm.linesRead += 1 } mm.work += string(mm.peep) // check! mm.peep, _, err = mm.reader.ReadRune() if err == io.EOF { mm.eof = true } else if err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) } } // remove escape character: trivial method ? // check the python code for this, and the c code in machine.interp.c func (mm *machine) unescapeChar(c string) { // if mm.work = "" { return } mm.work = strings.Replace(mm.work, "\\"+c, c, -1) } // add escape character : trivial func (mm *machine) escapeChar(c string) { mm.work = strings.Replace(mm.work, c, "\\"+c, -1) } /** a helper function to count trailing escapes */ func (mm *machine) countEscapes(suffix string) int { count := 0 ss := "" if strings.HasSuffix(mm.work, suffix) { ss = strings.TrimSuffix(mm.work, suffix) } for (strings.HasSuffix(ss, string(mm.escape))) { ss = strings.TrimSuffix(ss, string(mm.escape)) count++ } return count } // reads the input stream until the workspace ends with the // given character or text, ignoring escaped characters func (mm *machine) until(suffix string) { if mm.eof { return; } // read at least one character mm.read() for true { if mm.eof { return; } // we need to count the mm.Escape chars preceding suffix // if odd, keep reading, if even, stop if strings.HasSuffix(mm.work, suffix) { if (mm.countEscapes(suffix) % 2 == 0) { return } } mm.read() } } /* increment the tape pointer (command ++) and grow the tape and marks arrays if necessary */ func (mm *machine) increment() { mm.cell++ if mm.cell >= len(mm.tape) { mm.tape = append(mm.tape, "") mm.marks = append(mm.marks, "") mm.SIZE++ } } /* pop the last token from the stack into the workspace */ func (mm *machine) pop() bool { if len(mm.stack) == 0 { return false } // no, get last element of stack // a[len(a)-1] mm.work = mm.stack[len(mm.stack)-1] + mm.work // a = a[:len(a)-1] mm.stack = mm.stack[:len(mm.stack)-1] if mm.cell > 0 { mm.cell -= 1 } return true } // push the first token from the workspace to the stack func (mm *machine) push() bool { // dont increment the tape pointer on an empty push if mm.work == "" { return false } // push first token, or else whole string if no delimiter aa := strings.SplitN(mm.work, string(mm.delimiter), 2) if len(aa) == 1 { mm.stack = append(mm.stack, mm.work) mm.work = "" } else { mm.stack = append(mm.stack, aa[0]+string(mm.delimiter)) mm.work = aa[1] } mm.increment() return true } // func (mm *machine) printState() { fmt.Printf("Stack %v Work[%s] Peep[%c] \n", mm.stack, mm.work, mm.peep) fmt.Printf("Acc:%v Esc:%c Delim:%c Chars:%v", mm.counter, mm.escape, mm.delimiter, mm.charsRead) fmt.Printf(" Lines:%v Cell:%v EOF:%v \n", mm.linesRead, mm.cell, mm.eof) for ii, vv := range mm.tape { fmt.Printf("%v [%s] \n", ii, vv) if ii > 4 { return; } } } func (mm *machine) goToMark(mark string) { markFound := false for ii := range mm.marks { if mm.marks[ii] == mark { mm.cell = ii; markFound = true; break } } if markFound == false { fmt.Printf("badmark '%s'", mark) os.Exit(1) } } // this is where the actual parsing/compiling code should go // so that it can be used by other go classes/objects. Also // should have a stream argument. func (mm *machine) parse(s string) { } /* adapt for clop and clip */ func trimLastChar(s string) string { r, size := utf8.DecodeLastRuneInString(s) if r == utf8.RuneError && (size == 0 || size == 1) { size = 0 } return s[:len(s)-size] } func (mm *machine) clip() { cc, _ := utf8.DecodeLastRuneInString(mm.work) mm.work = strings.TrimSuffix(mm.work, string(cc)) } func (mm *machine) clop() { _, size := utf8.DecodeRuneInString(mm.work) mm.work = mm.work[size:] } type fn func(rune) bool // eg unicode.IsLetter('x') /* check whether the string s only contains runes of type determined by the typeFn function */ func isInClass(typeFn fn, s string) bool { if s == "" { return false; } for _, rr := range s { //if !unicode.IsLetter(rr) { if !typeFn(rr) { return false } } return true } /* range in format 'a,z' */ func isInRange(start rune, end rune, s string) bool { if s == "" { return false; } for _, rr := range s { if (rr < start) || (rr > end) { return false } } return true } /* list of runes (unicode chars ) */ func isInList(list string, s string) bool { return strings.ContainsAny(s, list) } func main() { // This size needs to be big for some applications. Eg // calculating big palindromes. Really // it should be dynamically allocated. var size = 30000 var mm = newMachine(size); var restart = false; // the go compiler complains when modules are imported but // not used, also if vars are not used. if restart {}; unicode.IsDigit('0'); strconv.Itoa(0); for !mm.eof { /* lex block */ for true { mm.read() /* read */ // make char number relative to line, for error messages if (isInList("\n", mm.work)) { mm.charsRead = 0 /* nochars */ } // newlines can separate commands in (gnu) sed so we will // just add a dummy ';' here. Also, no trailing ; is required if (isInList("\n", mm.work)) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += ";*" mm.push(); break } // ignore extraneous white-space? if (isInClass(unicode.IsSpace, mm.work)) { mm.work = "" // clear if (mm.eof) { break } restart = true; break // restart } // comments, convert to java comments if (mm.work == "#") { mm.work = "" // clear mm.work += "/* " mm.until("\n"); if (strings.HasSuffix(mm.work, "\n")) { mm.clip() } mm.work += " */\n" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // uncomment line below to include comments in output // add "comment*"; push; .reparse } // literal tokens '{' and '}' are used to group commands in // sed, ';' is used to separate commands and ',' to separate line // ranges. ! is the postfix negation operator for ranges if (mm.work == "," || mm.work == "{" || mm.work == "}" || mm.work == ";" || mm.work == "!") { mm.tape[mm.cell] = mm.work /* put */ mm.work += "*" mm.push(); break } // various actions: print, delete, swap if (mm.work == "=" || mm.work == "p" || mm.work == "P" || mm.work == "l" || mm.work == "d" || mm.work == "D" || mm.work == "F" || mm.work == "g" || mm.work == "G" || mm.work == "h" || mm.work == "H" || mm.work == "n" || mm.work == "N" || mm.work == "x" || mm.work == "z") { if (mm.work == "=") { mm.work = "" // clear // print line-number + newline mm.work += "System.out.println(mm.linesRead); /* '=' */" } if (mm.work == "d") { mm.work = "" // clear // 'd' delete pattern-space, restart // the if true trick is necessary to avoid 'unreachable statement' // java compile errors (when multiple 'd' commands are given) mm.work += "if (true) { mm.patternSpace.setLength(0); continue; } /* 'd' */" } if (mm.work == "D") { mm.work = "" // clear // add "/* 'D' delete pattern-space to 1st \\n, restart */"; mm.work += "if (mm.patternSpace.indexOf(\"\\n\") > -1) {\n" mm.work += " mm.patternSpace.delete(0, mm.patternSpace.indexOf(\"\\n\"));\n" mm.work += " mm.readNext = false; if (true) continue; \n" mm.work += "} else { mm.patternSpace.setLength(0); continue; } /* 'd' */" } if (mm.work == "F") { // F: print input filename + newline // maybe unsupported in java mm.work = "" // clear mm.work += "System.out.println(\"\"); /* F */" } if (mm.work == "g") { // g: replace patt-space with hold-space mm.work = "" // clear mm.work += "mm.patternSpace.setLength(0); \n" mm.work += "mm.patternSpace.append(mm.holdSpace); /* 'g' */" } if (mm.work == "G") { // G; append hold-space to patt-space + \\n" mm.work = "" // clear mm.work += "mm.patternSpace.append(\"\\n\" + mm.holdSpace); /* 'G' */" } if (mm.work == "h") { // h: replace hold-space with patt-space mm.work = "" // clear mm.work += "mm.holdSpace.setLength(0); \n" mm.work += "mm.holdSpace.append(mm.patternSpace); /* 'h' */" } if (mm.work == "H") { // H: append patt-space to hold-space + newline mm.work = "" // clear mm.work += "mm.holdSpace.append(\"\\n\" + mm.patternSpace); /* 'H' */" } if (mm.work == "l") { // print pattern-space unambiguously, synonym for p ? mm.work = "" // clear mm.work += "System.out.println(mm.patternSpace); /* 'l' */" } if (mm.work == "n") { // n: print patt-space, get next line into patt-space mm.work = "" // clear mm.work += "if (mm.autoPrint) { System.out.println(mm.patternSpace); }\n" mm.work += "mm.patternSpace.setLength(0);\n" mm.work += "mm.readLine(); /* 'n' */" } if (mm.work == "N") { // N: append next line to patt-space + newline mm.work = "" // clear mm.work += "mm.patternSpace.append('\\n'); " mm.work += "mm.readLine(); /* 'N' */" } if (mm.work == "p") { mm.work = "" // clear mm.work += "System.out.println(mm.patternSpace); /* 'p' */" } if (mm.work == "P") { // P: print pattern-space up to 1st newline" mm.work = "" // clear mm.work += "if (mm.patternSpace.indexOf(\"\\n\") > -1) {\n" mm.work += " System.out.println(\n" mm.work += " mm.patternSpace.substring(0, mm.patternSpace.indexOf(\"\\n\")));\n" mm.work += "} else { System.out.println(mm.patternSpace); }" } if (mm.work == "x") { // x: # swap pattern-space with hold-space mm.work = "" // clear mm.work += "mm.swap(); /* x */" } if (mm.work == "z") { // z: delete pattern-space, NO restart mm.work = "" // clear mm.work += "mm.patternSpace.setLenth(0); /* z */" } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "action*" mm.push(); break } // line numbers are also selectors if (isInRange('0','9', mm.work)) { /* while */ for isInRange('0','9', string(mm.peep)) { if mm.eof { break } mm.read() } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "number*" mm.push(); break } // $ is the last line of the file if (mm.work == "$") { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "number*" mm.push(); break } // patterns - only execute commands if lines match if (mm.work == "/") { // save line/char number for error message mm.work = "" // clear mm.work += "near line/char " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ":" mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.until("/"); if (!strings.HasSuffix(mm.work,"/")) { mm.work = "" // clear mm.work += "Missing '/' to terminate " mm.work += mm.tape[mm.cell] /* get */ mm.work += "?\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } mm.clip() // java .matches method matches whole string not substring // so we need to add .* at beginning and end, but not if regex // begins with ^ or ends with $. complicated hey if (!strings.HasSuffix(mm.work,"$")) { mm.work += ".*$" } if (!strings.HasPrefix(mm.work,"^")) { mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "^.*" mm.work += mm.tape[mm.cell] /* get */ } mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // add any delimiter for pattern here, or none mm.work += "\"" mm.work += mm.tape[mm.cell] /* get */ mm.work += "\"" mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "pattern*" mm.push(); break } // read transliteration commands if (mm.work == "y") { // save line/char number for error message mm.work = "" // clear mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ", char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // allow spaces between 'y' and '/' although gnu set doesn't mm.until("/"); if (!strings.HasSuffix(mm.work,"/") || !isInList(" /", mm.work)) { mm.work = "" // clear mm.work += "Missing '/' after 'y' transliterate command\n" mm.work += "Or trailing characters " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // save line/char number for error message mm.work = "" // clear mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ", char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.until("/"); if (!strings.HasSuffix(mm.work,"/")) { mm.work = "" // clear mm.work += "Missing 2nd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } if (mm.work == "/") { mm.work = "" // clear mm.work += "Sed syntax error? \n" mm.work += " Empty regex after 'y' transliterate command " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } // replace pattern found mm.clip() mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "y/" mm.work += mm.tape[mm.cell] /* get */ mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear // save line/char number for error message mm.work += "near line " mm.work += strconv.Itoa(mm.linesRead) /* lines */ mm.work += ", char " mm.work += strconv.Itoa(mm.charsRead) /* chars */ mm.increment() /* ++ */ mm.tape[mm.cell] = mm.work /* put */ if mm.cell > 0 { mm.cell-- } /* -- */ mm.work = "" // clear mm.until("/"); if (!strings.HasSuffix(mm.work,"/")) { mm.work = "" // clear mm.work += "Missing 3rd '/' after 'y' transliterate command " mm.work += mm.tape[mm.cell] /* get */ mm.work += "\n" fmt.Printf("%s", mm.work) // print os.Exit(0) } mm.clip() mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work /* swap */ mm.work += "/" mm.work += mm.tape[mm.cell] /* get */ mm.work += "/" // y/// does not have modifiers (unlike s///) mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear mm.work += "action*" mm.push(); break } // various commands that have an option word parameter // e has two variants // "e" { replace "e" "e; # exec patt-space command and replace"; } if (mm.work == "b" || mm.work == "e" || mm.work == "q" || mm.work == "Q" || mm.work == "t" || mm.work == "T") { // ignore intervening space if any mm.tape[mm.cell] = mm.work /* put */ mm.work = "" // clear /* while */ for isInList(" ", string(mm.peep)) { if mm.eof { break } mm.read() } mm.work = "" // clear // A bit more permissive that gnu-sed which doesn't allow // read to end in ';'. /* whilenot */ for !isInList(" ;}", string(mm.peep)) { if mm.eof { break; } mm.read() } // word parameters are optional to these commands // just add a space to separate command from parameter if (mm.work != "") { mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work /* swap */ mm.work += " " mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work /* swap */ } mm.work, mm.tape[mm.cell] = mm.tape[mm.cell], mm.work /* swap */ mm.work += mm.tape[mm.cell] /* get */ // hard to implement because java has no goto ? if (strings.HasPrefix(mm.work, "b")) { mm.work += "; # branch to