#* This script attempts to solve the rosettacode (www.rosettacode.org) challenge of converting a basic CSV (comma separated values) file into an html table. The CSV is limited- no commas in field values and a newline end the row. But this script allows a more realistic CSV format. The first row of the data is the field names of the data and should be marked up with etc. The fields may contain special html chars such as < and > NOTES examples here have a file name prefix of "ro." if they are the solution to a rosetta code problem. There is no start token for a record. TOKENS nl* - new line, end of line field* - one csv field fieldset* - a number of fields record* - one record (line) or a set of csv fields recordset* - HISTORY 2 sept 2021 continued developement. The script seems verbose but handles quite a lot. eg quoted text as a csv field, escaped quotes, empty fields. 31 aug 2021 script begun *# # using the empty recordset trick begin { add "recordset*"; push; } read; # ignore whitespace outside of quotes " " { while [ ]; clear; .restart } "\n" { put; clear; add "nl*"; push; .reparse } '"' { until '"'; clip; clop; unescape '"'; replace "<" "<"; replace ">" ">"; put; clear; add ""; get; add ""; put; clear; add "field*"; push; while [ ]; clear; (eof) { .reparse } read; !(eof).!",".!"\n" { clear; add "CSV syntax error: quoted fields must be ended by \n"; add " either a ',' a newline, or the end-of-file"; } clear; .reparse } "," { clear; } # maybe there is a more elegant way to do this, which # would also remove the need for the fieldset*field*nl* rule # and the recordset*field*nl* rule. whilenot [\n,]; replace "<" "<"; replace ">" ">"; put; clear; add ""; get; add ""; put; clear; add "field*"; push; parse> # useful for debugging add "line "; lines; add " char "; chars; add ": "; print; clear; add "\n"; unstack; print; clip; stack; # 1 token pop; # a record may be terminated by the end of input (eof)."fieldset*" { clear; add ""; get; add "\n"; put; clear; add "record*"; push; .reparse } # 2 tokens pop; "recordset*field*" { replace "field" "fieldset"; push; push; .reparse } # ignore blank lines "recordset*nl*" { clip; clip; clip; push; .reparse } "fieldset*field*" { clear; get; ++; get; --; put; clear; add "fieldset*"; push; .reparse } "fieldset*nl*" { clear; add "\n "; get; add "\n"; put; clear; add "record*"; push; .reparse } "recordset*record*" { clear; get; # if the recordset is empty, then this is the 1st # record and should be marked up as a table header. ++; get; --; put; clear; add "recordset*"; push; .reparse } # 3 tokens pop; "fieldset*field*nl*" { clear; add "\n "; get; ++; get; --; add "\n"; put; clear; add "record*"; push; .reparse } "recordset*field*nl*" { clear; add "\n "; ++; get; add "\n"; put; clear; --; add "recordset*record*"; push; push; .reparse } (eof) { !"recordset*" { clear; add "[invalid csv]\nThe parse stack was: "; print; clear; unstack; add "\n"; print; quit; } clear; add "\n"; get; add "
\n"; add "[valid csv!]\n"; print; quit; } push; push;