#*

  This version bash.history.comments.pss only prints commands in the 
  .bash_history file which have a comment (or multiple comment lines
  starting with hash) above the 'timestamp' or between the 
  timestamp and the bash command.

  This is an example of a task that becomes quite tricky in [sed]
  
  Parse a bash history file which has some explanatory 
  comments above or below the timestamp for commands. This is a format 
  that I use in my bash history file to remind me of something that I did
  a while ago, and also to make it easier to search for the command.
  
  Appears to be more or less working, and only takes about 1 second for a 35000
  line history file eg:
  >> pep -f pars/eg/history.pss ~/.bash_history

TESTING

  use the helper functions in helpers.pars.sh to translate to other 
  languages and run.

  * translate to python and run
  ---
    pep -f tr/translate.py.pss eg/bash.history.pss > eg/py/bash.history.pss
    chmod a+x eg/py/bash.history.pss
    cat ~/.bash_history | eg/py/bash.history.pss > test1.txt
    # now compare with the output from the interpreted script
    pep -f eg/bash.history.pss ~/.bash_history > test2.txt
    vimdiff test1.text test2.txt
  ,,,

NOTES

  The [pep] accumulator register is used to count how many 
  commands are filtered. Trivial commands, like 'cd' or 'ls' are 
  only filtered out if they dont have a comment.

HISTORY

  1 march 2025
    started to adapt this from bash.history.pss  
    appears to be working. Had a segmentation fault because 
    I forgot to clear the pep://workspace buffer before creating
    a new token, and the stack must have got too big? or my 
    c code is dodgy.

*#

  begin { 
    # the empty recordset trick to simplify the grammar rules
    add "recordset*"; push; 
  }
  read; 
  [\n] { 
    # just to debug
    # lines; print;
    clear; 
  }
  whilenot [\n]; 
  # ignore blank lines
  "",[:space:] { clear; .reparse }
  put;
  B"#".!"#" { 
    [#0123456789] {
      clear; add "timestamp*"; push; .reparse
    }
    clear; add "comment*"; push; .reparse
  }

  # tag the command as trivial if it is 
  # for later removal. If there is a comment above it we may keep it anyway
   
  # tag as trivial all commands less than 5 characters
  clip; clip; clip; clip;
  "" { clear; add "trivial*"; push; .reparse }

  clear; get;
  B"blog.",B"aa.",B"ardu.",
  B"df ","df",B"du ",B"mv ",B"cp ",B"less ",B"vim ",B"rm ",B"mkdir ",
  B"find ",B"locate ",B"cd ","cd",B"ls ","ls","pwd","hist","books","bk","ho",
  "updatedb","bashrc","vimrc","os","cos","ccos","make" { 
    clear; add "trivial*"; push; .reparse
  }

  clear; add "command*"; push;

parse>
  # for debugging
  # add "line "; lines; add " char "; chars; add ": "; print; clear; 
  # add "line "; lines; add ": "; print; clear; 
  # unstack; print; stack; add "\n"; print; clear;

  # ----------------
  # 2 tokens
  pop; pop; 

  # ignore duplicated timestamps. 
  "timestamp*timestamp*" {
    clear; ++; get; --; put; clear;
    add "timestamp*"; push; .reparse
  }

  # handle multiline comments
  "comment*comment*" {
    clear; get; add "\n"; ++; get; --; put; clear;
    add "comment*"; push; .reparse
  }

  # dont need because an initial recordset always exists
  #"record*record*","recordset*record*" {
  "recordset*record*" {
    # double space for readability
    clear; get; add "\n\n"; ++; get; --; put; clear;
    # debug code
    # a+; count; add " record!\n"; print; clear;
    add "recordset*"; push; .reparse
  }

  # this will be compiled differently from r*r*
  "recordset*command*" {
    # clear; get; add "\n"; ++; get; --; put; clear;
    # just ignore commands with no comments
    a+; # count filtered commands
    clear; add "recordset*"; push; .reparse
  }

  "recordset*trivial*" {
    a+; # count filtered commands
    clear; add "recordset*"; push; .reparse
  }

  (eof) {
    # clean up trailing comments etc
    "recordset*timestamp*","recordset*comment*" {
      clear; add "recordset*record*"; push; push; .reparse 
    }
  }

  # -------------
  # 3 parse tokens
  pop;

  # remove trivial commands without comments
  "recordset*timestamp*trivial*" {
    a+; # count filtered commands
    clear; add "recordset*"; push; .reparse
  }

  # ignore duplicated timestamps. 
  "timestamp*comment*timestamp*" {
    clear; ++; get; --; put; clear; ++; ++; get; --; put; --; clear;
    add "comment*timestamp*"; push; push;  .reparse
  }

  # amalgamate comments before and after the timestamp
  "comment*timestamp*comment*" {
    clear; 
    get; ++; ++; add "\n"; get; --; --; put; clear;
    add "comment*timestamp*"; push; push; .reparse
  }

  "comment*timestamp*command*","comment*timestamp*trivial*" {
    clear; get; add "\n"; ++; get; add "\n"; ++; get; --; --; put; clear;
    add "record*"; push; .reparse
  }

  # dont remove trivial commands with comments
  "timestamp*comment*command*","timestamp*comment*trivial*" {
    clear; 
    # switch the order to make comment precede timestamp
    ++; get; add "\n"; --; get; add "\n"; 
    ++; ++; get; --; --; put; clear;
    add "record*"; push; .reparse
  }

  "recordset*timestamp*command*" {
    # clear; ++; get; add "\n"; ++; get; --; put; --; clear;

    # just ignore commands with no comments
    a+; # count filtered
    clear; add "recordset*"; push; .reparse
  }

  # resolve commands and trivial command with comments
  "recordset*comment*command*","recordset*comment*trivial*" {
    clear; ++; get; add "\n\n"; ++; get; --; put; --; clear;
    add "recordset*record*"; push; push; .reparse
  }

  push; push; push;

  (eof) {
     pop; pop;
     !"recordset*" {
       push; push; add "# History file did not parse well!\n"; print; clear;
       add "# Parse stack was: "; print; clear; unstack; add "\n"; print;
       quit;
     }
     "recordset*" { 
       clear; get; 
       add "\n# History file parsed and filtered by pars/eg/bash.history.pss \n"; 
       add "# "; count; add " trivial commands (without preceding comments) were removed.\n"; 
       print;
     }
  }