#* A script to translate the "Brainf***" language into plain c. This version will try to compact strings of +++--- and >>><<< etc into 1 c statement, which should make the c code much more readable. * Valid brainf commands >> ">" p++ "<" p-- "+" data[pp]++ - data[pp]-- "[]" while data[pp] > 0 . putchar , getchar >> "#" print state (optional command). STATUS not working TESTING * translate a Brainf*** program to c, compile and execute ---- pep -f ../brainf.toclang.pss -i ',.+.+.' > test.c; gcc test.c -o test; ./test ,,,, NOTES This implementation of the brain compiler still doesnt pass the test completely in eg/brain/inout.b since it output "bL" instead of "LB". Not sure why. Other things to think about, include array bounds checking, and handling of EOF. This script demonstrates pep/nom parsing and translating one of the simplest possible formal languages. The script could be greatly reduced but some error messages are printed with line and character number. Hardly any parsing is required, just for "[" and "]" I put a line:char number message into the attribute for each [ or ], and then use that message if the brackets are unbalanced. This technique could be useful for other languages. This is because we dont need that tape-cell (token attribute) for anything else. HISTORY 30 july 2022 Started to adapt from brain.1to1.c.pss *# read; #">","<","+","-" { put; add "*"; push; .reparse } ",","." { # if this is 1st ><+- of list, then zero the accumulator ">" { clear; add "pp++;"; } "<" { clear; add "pp--;"; } "+" { clear; # do bounds checking here add "if ((pp < 0) || (pp > SIZE)) {\n"; add ' printf("data pointer out of bounds\\n"); \n'; add " exit(1);\n"; add "}\n"; add "data[pp] += 1;"; } "-" { # also do bounds checking here clear; add "data[pp]--;"; } "." { clear; add "putc(data[pp], stdout);"; } "," { clear; # also check for getc == EOF # which can also mean an error add "if (feof(stdin)) { exit(0); } \n"; add "if (data[pp] == EOF) { exit(1); } \n"; add "data[pp] = getc(stdin);"; } put; clear; add "com*"; push; .reparse } "[","]" { put; add "*"; push; add "unbalanced bracket '"; --; get; add "' "; add "near line:char "; lines; add ":"; chars; put; ++; clear; .reparse } # ignore any other character but parse as a dummy command # so that the ignored char will end up in the output. !"" { #* replace "\n" "\\n"; replace "\t" "\\t"; replace "\f" "\\f"; replace "\r" "\\r"; put; clear; add "// ignored character '"; get; add "'"; put; clear; add "com*"; push; *# clear; } parse> # for debugging, add a c-style comment add "// line "; lines; add " char "; chars; add ": "; print; clear; unstack; print; stack; add "\n"; print; clear; # ------------ # 2 tokens pop; pop; # brackets with nothing in them is really an error because # can create an infinite loop and does nothing anyway. "[*]*" { clear; add "[Brainf syntax error]\n Empty bracket '[]' "; add "near line:char "; lines; add ":"; chars; add "\n"; print; quit; } ">*<*","<*>*" { clear; .reparse } "+*-*","-*+*" { clear; .reparse } "+*+*" { # use the accumulator to generate the pointer translation code zero; a+; a+; clear; add "data[pp] += "; count; add ";"; add "ilist*"; push; .reparse } "-*-*" { # use the accumulator to generate the pointer translation code zero; a-; a-; clear; add "data[pp] += "; count; add ";"; put; clear; add "ilist*"; push; .reparse } ">*>*" { # use the accumulator to generate the pointer translation code zero; a+; a+; clear; add "pp += "; count; add ";"; add "mlist*"; push; .reparse } "<*<*" { # use the accumulator to generate the pointer translation code zero; a-; a-; clear; add "pp += "; count; add ";"; put; clear; add "mlist*"; push; .reparse } "mlist*>*" { # use the accumulator to keep track of how many +s and -s # and generate 1 c statement instead of multiple a+; clear; add "pp += "; count; add ";"; put; clear; add "mlist*"; push; .reparse } "mlist*<*" { a-; clear; add "pp += "; count; add ";"; put; clear; add "mlist*"; push; .reparse } "com*com*","comset*com*" { clear; get; add "\n"; ++; get; --; put; clear; add "comset*"; push; .reparse } # change the ilist (+/-) token to ordinary command when it is # not followed by +/- B"ilist*".!"ilist*".!E"+*".!E"-*" { replace "ilist*" "com*"; push; push; .reparse } # cannot resolve +/-/>/< until we have seen the last one # in the series. So we need some kind of "look-ahead" parsing B"-*".!"-*".!E"-*".!E"+*" { replace "-*" "com*"; push; push; .reparse } B"+*".!"+*".!E"-*".!E"+*" { replace "+*" "com*"; push; push; .reparse } B">*".!">*".!E">*".!E"<*" { replace ">*" "com*"; push; push; .reparse } B"<*".!"<*".!E"<*".!E">*" { replace "<*" "com*"; push; push; .reparse } # also handle (eof) cases (eof) { # There may be 1 or 2 tokens, it doesnt matter. E">*",E"<*",E"-*",E"+*" { replace ">" "com"; replace "<" "com"; replace "-" "com"; replace "+" "com"; push; push; .reparse } } # same logic as ilist B"mlist*".!"mlist*",!E">*".!E"<*" { replace "mlist*" "com*"; push; push; .reparse } # ---------- # 3 tokens pop; "[*com*]*","[*comset*]*" { # indent the braced code clear; add " "; ++; get; replace "\n" "\n "; put; clear; add "while (data[pp] > 0) {\n"; get; --; add "\n}"; put; clear; add "com*"; push; .reparse } (eof) { "com*","comset*" { clear; # indent the generated code add " "; get; replace "\n" "\n "; put; clear; add "/* Code translated from the 'Brainf***' esoteric language \n"; add " by the pep/nom parser with the script: \n"; add " bumble.sf.net/books/pars/eg/brain.c.pss */\n\n"; add "#include \n"; add "#include \n"; add "#define SIZE 65000 \n"; add "int main(int argc, char *argv[]) {\n"; add " int data[SIZE] = {0};\n"; add " int pp = 0;\n"; get; add "\n}\n"; print; quit; } "[*comset*","[*com*",B"[*" { # get the saved error message from the [ token cell clear; get; add "\n"; print; quit; } "comset*]*","com*]*",E"]*" { # get the saved error message from the ] token cell clear; ++; get; add "\n"; print; quit; } put; add " The 'Brainf' code didnt parse well: Legal Brainf commands are > increment pointer < decrement pointer + increment value at pointer - [ ] . , (optional command) '#' print state Everything else is ignored. The input parsed as tokens:\n "; print; clear; get; add "\n"; print; quit; } push; push; push;