#!/usr/bin/python
#coding=utf8

# File: sf/htdocs/books/swift/app/csvtojson.species.py
#
# This file is designed to convert a vast number (about 20000) of files
# containing the list of species for each botanical plant "genus" into 
# json format from csv (comma separated values) format.
# json format. The resulting json text file is more than twice the size 
# of the csv text file because  the property names are stored for every 
# object in the json format eg:
#    [ 
#        { "family": "Myrtaceae", 
#          "genus": "Eucalyptus",
#          "species": "globulus"}
#         ...
#    ]
#
# The original format of the csv files comes from the theplantlist.org
# site. It includes technical information about the classification of the 
# plant (eg date classified, who classified it ...). The csv file names 
# are in the format "Family.Genus.csv" eg "Myrtaceae.Eucalyputs.csv"
#
# There are approximately 23000 csv files that need to be processed and 
# the total size for those files is about 200 megabytes
#

#
# This effort is part of the earthtree ios swift app which is 
# designed to display and map trees in the entire world! one by one!
# 
# I will use the "Codable" swift protocol to convert the json data
# to swift objects, and the UITableViews, MapKits etc to display it and 
# hopefully, to edit it, add to it.  
#
# Usage:
#   python csvtojson.genera.py > plant.genera.all.json
#
# Remember to remove the last comma in the generated file !!!! That 
# extra comma will make the swift Codable encoder not work.
#
# History:
#  3 July 2019
#    began to adapt this script from the very similar one "csvtojson.genera.py" 
#
# Field headers from the csv files:
#   ID,Major group,Family,Genus hybrid marker,Genus,Species hybrid marker,
#   Species,Infraspecific rank,Infraspecific epithet,Authorship,
#   Taxonomic status in TPL,Nomenclatural status frv
#   Source,Source id,IPNI id,Publication,Collation,Page,Date,Accepted ID
#   kew
# 
# "Major Group" appears to mean flowering or non-flowering plant.
# "ID": is a Kew specific key I believe.
#
# Actually we dont want array brackets for each file. This will all go into
# one very big json file (all one array)

import sys
import csv

with open(sys.argv[1], mode='r') as f:
  reader = csv.DictReader(f)
  # print "[ " 

  # Output pretty printed geojson data. 
  for row in reader:
    if row["Species hybrid marker"] != "":
      row["Species"] = row["Species hybrid marker"] + " " + row["Species"]
    if row["Infraspecific rank"] != "":
      row["Infraspecific epithet"] = row["Infraspecific rank"] + " " + row["Infraspecific epithet"]
      # print genus

    s = """ {{
   "group":"{Major group}",
   "family":"{Family}",
   "genus":"{Genus}",
   "species":"{Species}",
   "infra":"{Infraspecific epithet}",
   "status":"{Taxonomic status in TPL}",
   "author":"{Authorship}",
   "date":"{Date}"
 }},""".format(**row)

    print s

  # print "] "