#*
 
 This script attempts to solve the rosettacode (www.rosettacode.org)
 challenge of converting a basic CSV (comma separated values) file 
 into an html table. The CSV is limited- no commas in field values
 and a newline end the row. But this script allows a more realistic
 CSV format.

 The first row of the data is the field names of the data and should
 be marked up with <thead> etc. The fields may contain special html 
 chars such as < and > 

NOTES

  examples here have a file name prefix of "ro." if they are the 
  solution to a rosetta code problem.

  There is no start token for a record. 

TOKENS
  nl* - new line, end of line
  field* - one csv field
  fieldset* - a number of fields
  record* - one record (line) or a set of csv fields
  recordset* -

HISTORY

  2 sept 2021
    continued developement. The script seems verbose but handles
    quite a lot. eg quoted text as a csv field, escaped quotes,
    empty fields. Found a way to eliminate the trailing nl* token
    dilemma.

  31 aug 2021
    script begun

*#

  # using the empty recordset trick
  begin { 
    add "recordset*"; push; 
  }
  read; 
  # ignore whitespace outside of quotes
  " " { while [ ]; clear; .restart }
  "\n" { put; clear; add "nl*"; push; .reparse }
  '"' { 
    until '"'; clip; clop; unescape '"';
    whilenot [\n,]; 
    replace "&" "&amp;"; replace "<" "&lt;"; replace ">" "&gt;"; 
    put; clear; add "<td>"; get; add "</td>"; put; clear;
    add "field*"; push; 
    clear; .reparse
  }
  "," { clear; }

  whilenot [\n,];
  replace "&" "&amp;"; replace "<" "&lt;"; replace ">" "&gt;";
  put; clear; add "<td>"; get; add "</td>"; put; clear;
  add "field*"; push; 
  
parse>
  # useful for debugging
  #add "line "; lines; add " char "; chars; add ": "; print; clear; 
  #add "\n"; unstack; print; clip; stack; 

  # 1 token
  pop; 
  # a record may be terminated by the end of input
  (eof)."fieldset*" {
    clear; add "<tr>"; get; add "</tr>\n"; put; clear;
    add "record*"; push; .reparse
  }
  # 2 tokens
  pop; 
  "recordset*field*" {
    replace "field" "fieldset"; push; push; .reparse 
  }
  # ignore blank lines
  "recordset*nl*" {
    clip; clip; clip; push; .reparse 
  }
  "fieldset*field*" {
    clear; get; ++; get; --; put; clear;
    add "fieldset*"; push; .reparse
  }
  "fieldset*nl*" {
    clear; add "<tr>\n "; get; add "</tr>\n"; put; clear;
    add "record*"; push; .reparse
  }
  "recordset*record*" {
    clear; a+; get; 
    ++; get; --; put; clear;
    # if the count is 1, then this is the 1st 
    # record and should be marked up as a table header.
    count; "1" {
      clear; get; replace "tr>" "thead>"; put;
    }
    clear; add "recordset*"; push; .reparse
  }

  (eof) {
    !"recordset*" {
      clear; add "[invalid csv]\nThe parse stack was: "; print;
      clear; unstack; add "\n"; print; quit;
    }
    clear; 
    add "<table>\n"; get; add "</table>\n"; 
    add "[valid csv!] Found "; count; add " records.\n"; 
    print; quit;
  }
  push; push;