#* ABOUT A script to translate the "brainf**k" language into the rust language This is also an exercise to learn something about rust. BRAINFORK SYNTAX The brainf**k language is an esoteric (not considered useful for practical programming) language with an extremely simple syntax. * Valid brainfork commands and equivalent c commands ---- ">" p++ "<" p-- "+" data[pp]++ - data[pp]-- "[]" while data[pp] > 0 . putchar , getchar "#" print state (optional command). ,,,, STATUS 14 april 2025 just started based on the script /eg/brainfork.c.pss TESTING * translate a brainfork program to rust, compile and execute ---- pep -f ../brainfork.torust.pss -i ',.+.+.' > test.rs rustc test.rs ./test ,,,, TOKENS literal [] +-><.,# ilist* a list of increment decrement instructions mlist* a list of >>< pointer movement instructions com* a command or block of commands. NOTES Brainfork is my name for the brainf**k language. Could include an error check block and help block here as a demonstration. This implementation of the brain compiler still doesnt pass the test completely in eg/brain/inout.b since it output "bL" instead of "LB". Not sure why. Other things to think about, include array bounds checking, and handling of EOF. This script demonstrates pep/nom parsing and translating one of the simplest possible formal languages. Put a line/char number in the open bracket token in case the brackets are not balanced. This allows a much better error message to be given. This is a technique that can be used in many languages for error messages. I also use parsing here to reduce strings of ++---- commands to one compiled statement. And the same for <<<<>> strings HISTORY 14 april eliminated the comset* token which represented a list of commands. So I use com* for one command or a block of commands or a list of commands. 12 april 2025 Still need to fix the grammar. Perhaps it would be better to just have a com* token instead of comset* and com* this seems to simplify greatly the grammar. having another look at this. It seems like an interesting example of trying to 'optimize' the compiled c code. lists of similar commands are being reduced to one command. 30 july 2022 Started to adapt from brain.1to1.c.pss *# read; #">","<","+","-" { put; add "*"; push; .reparse } ">","<","+","-" { put; # not bothering with bounds checking ">" { clear; add "pp += 1;"; } "<" { clear; add "pp += 1;"; } "+" { clear; add "data[pp]++;"; } "-" { clear; add "data[pp]--;"; } # mark the parse token swap; add "*"; push; .reparse } ",","." { "." { clear; add "putc(data[pp], stdout);"; } "," { clear; # also check for getc == EOF # which can also mean an error add "if (feof(stdin)) { exit(0); } \n"; add "if (data[pp] == EOF) { exit(1); } \n"; add "data[pp] = getc(stdin);"; } put; clear; add "com*"; push; .reparse } "[","]" { put; add "*"; push; # make an error message for later maybe add "unbalanced bracket '"; --; get; add "' "; add "near line:char "; lines; add ":"; chars; put; ++; clear; .reparse } # ignore any other character but parse as a dummy command # so that the ignored char will end up in the output. !"" { #* replace "\n" "\\n"; replace "\t" "\\t"; replace "\f" "\\f"; replace "\r" "\\r"; put; clear; add "// ignored character '"; get; add "'"; put; clear; add "com*"; push; *# clear; } parse> # for debugging, add a c-style comment add "// line "; lines; add " char "; chars; add ": "; print; clear; unstack; print; stack; add "\n"; print; clear; # ------------ # 2 tokens pop; pop; # brackets with nothing in them is really an error because # can create an infinite loop and does nothing anyway. "[*]*" { clear; add "[Brainf syntax error]\n Empty bracket '[]' "; add "near line:char "; lines; add ":"; chars; add "\n"; print; quit; } # redundant commands or inverse ">*<*","<*>*" { clear; .reparse } "+*-*","-*+*" { clear; .reparse } # if this is 1st ><+- of list, then zero the accumulator "ilist*-*","-*-*" { # use the accumulator to keep track of how many +s and -s # and generate 1 c statement instead of multiple B"-*" { zero; a-; } a-; clear; add "data[pp] += "; count; add ";"; put; clear; add "ilist*"; push; .reparse } "ilist*+*","+*+*" { # use the accumulator to keep track of how many +s and -s # and generate 1 c statement instead of multiple B"+*" { zero; a+; } a+; clear; add "data[pp] += "; count; add ";"; put; clear; add "ilist*"; push; .reparse } "mlist*>*",">*>*" { # use the accumulator to keep track of how many +s and -s # and generate 1 c statement instead of multiple B">*" { zero; a+; } a+; clear; add "pp += "; count; add ";"; put; clear; add "mlist*"; push; .reparse } "mlist*<*","<*<*" { # use the accumulator to keep track of how many +s and -s # and generate 1 c statement instead of multiple B"<*" { zero; a-; } a-; clear; add "pp += "; count; add ";"; put; clear; add "mlist*"; push; .reparse } # making commands into commandsets #* E"com*".!B"com*".!B"comset*".!"com*" { replace "com*" "comset*"; push; push; reparse; } *# # we have to reduce mlists later "com*com*" { clear; get; add "\n"; ++; get; --; put; clear; add "com*"; push; .reparse } # change the ilist (+/-) token to ordinary command when it is # not followed by +/- B"ilist*".!"ilist*".!E"+*".!E"-*" { replace "ilist*" "com*"; push; push; .reparse } # cannot resolve +/-/>/< until we have seen the last one # in the series. So we need some kind of "look-ahead" parsing B"-*".!"-*".!E"-*".!E"+*" { replace "-*" "com*"; push; push; .reparse } B"+*".!"+*".!E"-*".!E"+*" { replace "+*" "com*"; push; push; .reparse } B">*".!">*".!E">*".!E"<*" { replace ">*" "com*"; push; push; .reparse } B"<*".!"<*".!E"<*".!E">*" { replace "<*" "com*"; push; push; .reparse } # same logic as ilist B"mlist*".!"mlist*".!E">*".!E"<*" { replace "mlist*" "com*"; push; push; .reparse } # also handle (eof) cases (eof) { # There may be 1 or 2 tokens, it doesnt matter. E"mlist*",E"ilist*",E">*",E"<*",E"-*",E"+*" { replace ">" "com"; replace "<" "com"; replace "-" "com"; replace "+" "com"; replace "mlist" "com"; replace "ilist" "com"; push; push; .reparse } } # ---------- # 3 tokens pop; #need to handle the lookahead token here. and resolve #command sequences, but there are alot of options. B"com*com*".!"com*com*" { replace "com*com*" "com*"; push; push; --; --; get; add "\n"; ++; get; --; put; ++; ++; # transfer unknown token attrib clear; get; --; put; ++; clear; .reparse } "[*com*]*" { # indent the braced code clear; add " "; ++; get; replace "\n" "\n "; put; clear; add "while (data[pp] > 0) {\n"; get; --; add "\n}"; put; clear; add "com*"; push; .reparse } (eof) { "com*","ilist*","mlist*",".*",",*" { clear; # indent the generated code add " "; get; replace "\n" "\n "; put; clear; add "/* Code translated from the 'brainf***' esoteric language \n"; add " by the pep/nom parser with the script: \n"; add " bumble.sf.net/books/pars/eg/brainfork.torust.pss */\n\n"; add "use std::mem;\n"; add "const SIZE: i32 = 65000; \n"; add "fn main() {\n"; add " int data[SIZE] = {0};\n"; add " int pp = 0;\n"; get; add "\n}\n"; print; quit; } "[*com*",B"[*" { # get the saved error message from the [ token cell clear; get; add "\n"; print; quit; } "com*]*",E"]*" { # get the saved error message from the ] token cell clear; ++; get; add "\n"; print; quit; } put; add " The 'Brainfork' code didnt parse well: Legal Brainfork commands are > increment pointer < decrement pointer + increment value at pointer - decrement value at pointer [ ] do while value at pointer > 0 . print value of pointer , get one character from stdin (optional command) '#' print state Everything else is ignored. The input parsed as tokens:\n "; print; clear; get; add "\n"; print; quit; } push; push; push;