# Description:
#   A script to reformat a plain text file document  which contains
#   a dairy of some sort into some kind of html. By a 'diary' I mean
#   a series of entries for particular dates. Specifically, the text file
#   should contain dates in a format like '* 3 march 2003, Saturday' or
#   something similar, and should be followed by some sort of descriptive
#   text relating to that date. The script recognises some special structures
#   within the plain text document. For example:
#
#   The diary entry dates should be on a line by themselves and should 
#   begin with a * character like this:
#     * 3 January 1992, Friday
#   
#   The '=' character, when the first non-whitespace character on a line indicates
#   that all the following text on the line should be formatted as a
#   'heading'. Also, url style strings should be recognised and given
#   a hyperlink token in from of them, such as '[*]'. I prefer this to underlining
#   the entire url, because I find that the underlining tends to interfer with
#   the readability of the text. Some people would say, "use style-sheets" but to
#   them I would reply that the 'heraldic' visual pattern of the underlined hyperlink
#   is imprinted in many internet users brains, and to change that 'iconography' can
#   lead to unnecessary confusion.
#
# Examples:
#   ./diary2html.sh mjb-work.txt notran > mjb-work.html
#     This command line, executed in some kind of a bash shell, will transform a 
#     plain text file which has 'diary' style entries, into an HTML file (that is
#     it will create a new HTML file and leave the original text file unchanged) and 
#     will not display the automatic translation links to Google.
#     Also an HTML table of contents (with one entry for each date) will be 
#     inserted in the HTML document.
#
#   ./diary2html.sh mjb-work.txt notran notoc > mjb-work.html
#     The text file will be transformed into HTML but no table of contents
#     will be inserted nor any translation links.
#
#   ./diary2html.sh mjb-work.txt blah notoc > mjb-work.html
#     If translation links are desired but no table of contents use a 
#     command line similar to above. The string 'blah' could be anything
#     as long as its not 'notran'. This slighty dodgy 'feature' is owing to the
#     fact that I am not using any 'getopt' style option parsing.
#
# Parameters:
#   textFileName  [required]
#     The name of the text file which is to be transformed from text into html
#   notran        [optional]
#     If the second parameter is the string 'notran' then the javascript links
#     to the google automatic language translation engine will NOT be inserted
#     into the HTML page. This is useful, for example, when the HTML page is 
#     going to be located within a 'password-protected' directory, because
#     the Google translation engine will not be able to access the page, and
#     therefor the translation links will not work.
#   notoc         [optional]
#     If the third parameter is the string "notoc", then no HTML table of
#     contents will be generated.
# Notes:
#   This script contains an improved url detection regular expresion, better than that
#   in say txtdoc2html.sh. But the url pattern matcher still has a problem when
#   somebody puts a full stop after a url. It thinks that that dot is part of the 
#   url.
# See Also:
#   txtdoc2html.sh, linkdoc2html.sh, plaintext2html.sh
# Author:
#   m.j.bishop

 if [ "$1" = "" ]
 then
   echo "usage: $0  textFileName [notran] [notoc]"
   cat $0 | sed -n "/^[ ]*#/p" 
   exit 1;
 fi

 
 #-- The section below creates the table of contents for the diary.
 #-- This line is designed to only number lines which match a pattern
 #-- In theory 'nl -bpPATTERN' should also do this, but it insisted on
 #-- 'double-spacing' the output

 cat $1 | expand | mawk '/^[ ]*\*[ ]*[^ ]+/{ii++; print ii $0}!/^[ ]*\*[ ]*([^ ]+)/' > $1.temp
 (echo "<center><a name = \"toc\"></a>"; \
 cat $1.temp | \
    sed "/^[ ]*\([0-9]\{1,\}\)[ ]*\*\(.*\)/!d" | \
    sed "s/\(monday\|tuesday\|wednesday\|thursday\|friday\|saturday\|sunday\)//gi" | \
    sed "s/^[ ]*\([0-9]\{1,\}\)[ ]*\*\(.*\)/<a href=\"#item\1\" class = \"t\">\2<\/a> | /g"; \
  echo "</center>";) > diary-toc.temp

 echo "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">"
 echo "<html>"
 echo " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; CHARSET=iso-8859-1\">"
 echo " <META HTTP-EQUIV=\"Keywords\""
 echo "          CONTENT=\"\">"
 echo " <META HTTP-EQUIV=\"Description\""
 echo "          CONTENT=\"\">"
 echo "        <LINK REV=\"made\" HREF=\"mailto:webmaster@ella-associates.org\">"
 echo ""
 echo "<!-- HTML generated by the \"$(basename $0)\" script         -->"
 echo "<!-- From the File: \"$1\" -->"
 echo "<!-- On the Date: $(date)  -->"
 echo "<!-- see http://www.ella-associates.org/utils/$(basename $0) -->"
 echo "<link   rel = \"stylesheet\"  type = \"text/css\""
 echo "       href = \"/stylesheets/swish-style.css\">"
 echo "<head>"
 echo "<script language = \"javascript\">"
 echo "
 <!--
   function redirectToGoogleTranslation(sSourceLanguage, sTargetLanguage)
   {
     var sTranslationUrl = 'http://translate.google.com/translate?u=';
 
     sTranslationUrl += escape(document.location.href);
     sTranslationUrl += '&langpair=' + sSourceLanguage + '|' + sTargetLanguage;
     sTranslationUrl += '&hl=' + sSourceLanguage;
     // document.testForm.test.value=sTranslationUrl;
     window.location = sTranslationUrl;
   } //-- redirectToGoogleTranslation()
 -->  "
 
 echo "</script>"
 echo "</head>"
 echo "<body>"
 #-- The Google automatic translation links below, are sometimes disabled because they will
 #-- not work from within a password protected directory, since Google does not
 #-- have permission to view that directory.
 if [ "$2" != "notran" ]
 then
   echo "<center>"
   echo "See this page in (approximate):"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Español</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Français</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Português</a>"
   echo "</center>"
 fi
 
 #-- This was the old regular expression used to find dates, but now I am using * format
 #--
 #- sed "s/^[ 0-9,]*\(jan\|feb\|mar\|apr\|may\|jun\|jul\
 #    |aug\|sep\|oct\|nov\|dec\)[a-z]*[ 0-9,]*.*/<strong>&<\/strong>/gi" | \
 
#-- Put the page heading before the table of contents
#--
cat $1.temp | \
   sed "/^[ ]*=[ ]*.*/!d" | \
   sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
   sed "s/^[ ]*=[ ]*\(.*\)/<center><h2>\1<\/h2><\/center>/gi" 

echo "<table align=\"center\" width = \"90%\"><tr><td>"
   
#-- Inset the table of contents   
if [ "$3" != "notoc" ]
then
  cat diary-toc.temp
fi

#-- Transform the text to HTML, insert anchors
#-- Also delete the heading line which has already been inserted in the HTML
#-- But, the line will also delete lines beginning in == or === etc, which
#-- may not be desirable.
cat $1.temp | \
   expand | \
   sed "/^[ ]*=[ ]*\(.*\)/d" | \
   sed -e "s/</\&lt;/g" -e "s/>/\&gt;/g" | \
   sed -e "s/^[ ]*\-\-\&gt;\&gt;/<pre class = \"sed\">/g" -e "s/^[ ]*\-\-\&lt;\&lt;/<\/pre>/g" | \
   sed "s/^[ ]*\([0-9]\{1,\}\)[ ]*\*\(.*\)/<br><u><strong><a name=\"item\1\">\2<\/a><\/strong><\/u> <a href=\"#toc\">[TOC]<\/a>/g" | \
   sed "s/\(http:\/\/[-a-z\%0-9\~\\\/\"\'\.\@]\{3,\}\)/<a href='\1' class = \"t\">[*]<\/a><tt> \1<\/tt>/gi" | \
   sed "s/[^a-zA-Z\/]\(www\.[-a-z\%0-9\~\\\/\"\'\.\@]\{2,\}\)/<a href='http:\/\/\1'>[*]<\/a><tt> \1<\/tt>/gi" | \
   sed "/<pre class = \"sed\">/,/<\/pre>/!s/[ ]\{2\}/\&nbsp;\&nbsp;/g" | \
   sed "/<pre class = \"sed\">/,/<\/pre>/!s/^/<br>/g" 
 echo "<br>"
 echo "</td></tr></table>"

 if [ "$2" != "notran" ]
 then
   echo "<center>"
   echo "See this page in (approximate):"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'es');\">Español</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'fr');\">Français</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'it');\">Italiano</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'de');\">Deutsch</a>|"
   echo "<a  href=\"javascript:redirectToGoogleTranslation('en', 'pt');\">Português</a>"
   echo "</center>"
 fi
 
 echo "</body>"
 echo "</html>"
 
 rm -f diary-toc.temp
 rm -f $1.temp