Needed a simple CSV file that had headers and no nesting of data to consume in Tableau. The JSON file I was consuming had nested dictionaries and lists (which sometimes had in themselves nested lists/dictionaries).
The output is a pipe delimited format (because free text fields have comments in that mess things up) with nested fields having both the original dictionary key plus any lower keys as the headers.
# -*- coding: utf-8 -*- """ Created on Fri Sep 14 12:22:02 2018 @author: redim_learning """ import sys import time class parse_json(): # this class handles parsing json data, once it's been read in as a string using the json library def __init__(self): return None #this writes out dictionaries within the main json dictonary def write_dict(self, sub_dict, dict_name, f, str_dict): try: str_dict = str_dict + ('"%s"' % (str(dict_name))) #commar?! except: print ('key not passed correctly', str(dict_name)) try: for second_key, second_value in sub_dict.items(): if type(second_value) == dict: print ('dictionary within dictionary') print( second_key) write_dict(item[key],second_key,f,str_dict) str_value = str(second_value) #clean up characters if '\n' in second_key or '\r' in str_value: str_value = (str_value).replace('\n','').replace('\r','').replace(chr(34),chr(39)) str_dict = str_dict + (', "%s"' % str_value) return str_dict[:len(str_dict)] except: print('dict write out did not work\n' , str_dict ) print('item[key] is ', sub_dict) #print('second key:%s, second value:%s' %(second_key, second_value)) #this function manages to parse a list that is stored within the original json dictionary def write_list(self, item, key, f, list_str): # don't write a new line, that will be done later #write first item in the list in current row for list_value in item[key]: if type(list_value) ==str: list_str = list_str +(list_value.replace('\n','').replace('\r','').replace(chr(34),chr(39)) + ", ") elif type(list_value) ==dict: #sys.stdout.write("\nkey: %s, type(item): %s" % (key, type(item))) #print('\nlist_value is :' + list_value) sys.stdout.flush() sys.stdout.flush() sub_dict = list_value list_str = list_str + write_dict(sub_dict,dict_name, f, list_str) return list_str[:len(list_str)-2] #this is needed to know when to add a line feed to the total string def find_last_field(self, item): #loop through all fields and return last header value for header,value in item.items(): last_key = header #print (header) return last_key #parses headers def write_headers(self, item,last_header): header_list = '' object_list = '' for h,v in item.items(): if type(v) ==dict: for sub_header in v.items(): if type(sub_header) == tuple: object_list = object_list + '"' + h + '_' + sub_header[0]+ '",' else: object_list = object_list + '"' + sub_header + '",' elif type(v) ==list: for rec in v: object_list = object_list + '"' + h + "'," else: header_list = str(header_list) + '"' + h+ '",' # return the full header string, but exclude the last commar return header_list + object_list[:len(object_list)-1] def parse_json_file(self, data, f, page): full_str = '' last_header = '' for item in data: try: sys.stdout.write("\rPage %i, record %s of %i records" %(page+1, str(item['id']), len(data))) #Writes out progress for user sys.stdout.flush() except TypeError: sys.stdout.flush() sys.stdout.write("\rprogress is progressing ") sys.stdout.flush() sys.stdout.flush() #when you're only looking at one record if type((item))==str: item = data dict_str = '' list_str = '' item_str = '' if last_header == '' and page == 0: #determine the last header so you know when to write the line return last_header = find_last_field (item) #write out a the headers in the first row f.write(write_headers(item, last_header) + "\n") for key, value in item.items(): #print (item_str ) #print (key,value, type(value)) #try: if type(item[key]) == dict: #print('is dict') try: dict_str = dict_str + write_dict(value, key, f, dict_str) except: sys.stdout.write("\rdictionary didn't write properly ") sys.stdout.flush() elif type(item[key]) == list: #print('is list') try: list_str = list_str + write_list(item, key, f, list_str) except: sys.stdout.write("\rlist didn't write properly ") sys.stdout.flush() elif type(item[key])==tuple: item_str = item_str + '"' + value[1] +'",' elif type(item[key])==int: item_str = item_str +'"' + str(value) +'",' elif value == 'True' or value == 'False' or type(value) ==bool: #print('is bool') item_str = item_str + '"' + str(value).lower() +'",' #print (item_str) elif type(value) == str: #print('is str') item_str = item_str +'"' + value.lower().replace('\n','').replace('\r','').replace(chr(34),chr(39)) +'",' #print (item_str) elif type(value) == None or value == None: #print('is str') item_str = item_str +'"",' else: print ('not added %s as is type %s' % (value, type(value))) full_str = full_str + item_str + dict_str + list_str + "\n" #print (full_str) time.sleep(0.5) #Wait so we don't overload instance with too many api requests #break return (full_str)