#!/usr/bin/python #coding=utf8 # File: sf/htdocs/books/swift/app/csvtojson.species.py # # This file is designed to convert a vast number (about 20000) of files # containing the list of species for each botanical plant "genus" into # json format from csv (comma separated values) format. # json format. The resulting json text file is more than twice the size # of the csv text file because the property names are stored for every # object in the json format eg: # [ # { "family": "Myrtaceae", # "genus": "Eucalyptus", # "species": "globulus"} # ... # ] # # The original format of the csv files comes from the theplantlist.org # site. It includes technical information about the classification of the # plant (eg date classified, who classified it ...). The csv file names # are in the format "Family.Genus.csv" eg "Myrtaceae.Eucalyputs.csv" # # There are approximately 23000 csv files that need to be processed and # the total size for those files is about 200 megabytes # # # This effort is part of the earthtree ios swift app which is # designed to display and map trees in the entire world! one by one! # # I will use the "Codable" swift protocol to convert the json data # to swift objects, and the UITableViews, MapKits etc to display it and # hopefully, to edit it, add to it. # # Usage: # python csvtojson.genera.py > plant.genera.all.json # # Remember to remove the last comma in the generated file !!!! That # extra comma will make the swift Codable encoder not work. # # History: # 3 July 2019 # began to adapt this script from the very similar one "csvtojson.genera.py" # # Field headers from the csv files: # ID,Major group,Family,Genus hybrid marker,Genus,Species hybrid marker, # Species,Infraspecific rank,Infraspecific epithet,Authorship, # Taxonomic status in TPL,Nomenclatural status frv # Source,Source id,IPNI id,Publication,Collation,Page,Date,Accepted ID # kew # # "Major Group" appears to mean flowering or non-flowering plant. # "ID": is a Kew specific key I believe. # # Actually we dont want array brackets for each file. This will all go into # one very big json file (all one array) import sys import csv with open(sys.argv[1], mode='r') as f: reader = csv.DictReader(f) # print "[ " # Output pretty printed geojson data. for row in reader: if row["Species hybrid marker"] != "": row["Species"] = row["Species hybrid marker"] + " " + row["Species"] if row["Infraspecific rank"] != "": row["Infraspecific epithet"] = row["Infraspecific rank"] + " " + row["Infraspecific epithet"] # print genus s = """ {{ "group":"{Major group}", "family":"{Family}", "genus":"{Genus}", "species":"{Species}", "infra":"{Infraspecific epithet}", "status":"{Taxonomic status in TPL}", "author":"{Authorship}", "date":"{Date}" }},""".format(**row) print s # print "] "