python-automation-scripts/hackernews-scraper/json_to_csv.py at master · avidLearnerInProgress/python-automation-scripts · GitHub

Name: python-automation-scripts/hackernews-scraper/json_to_csv.py at master · avidLearnerInProgress/python-automation-scripts · GitHub
Rating: 4.4 (6864 reviews)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
importsys
importjson
importcsv

##
# Convert to string keeping encoding in mind...
##
defto_string(s):
try:
returnstr(s)
except:
#Change the encoding type if needed
returns.encode('utf-8')


##
# This function converts an item like
# {
# "item_1":"value_11",
# "item_2":"value_12",
# "item_3":"value_13",
# "item_4":["sub_value_14", "sub_value_15"],
# "item_5":{
# "sub_item_1":"sub_item_value_11",
# "sub_item_2":["sub_item_value_12", "sub_item_value_13"]
# }
# }
# To
# {
# "node_item_1":"value_11",
# "node_item_2":"value_12",
# "node_item_3":"value_13",
# "node_item_4_0":"sub_value_14",
# "node_item_4_1":"sub_value_15",
# "node_item_5_sub_item_1":"sub_item_value_11",
# "node_item_5_sub_item_2_0":"sub_item_value_12",
# "node_item_5_sub_item_2_0":"sub_item_value_13"
# }
##
defreduce_item(key, value):
globalreduced_item

#Reduction Condition 1
iftype(value) islist:
i=0
forsub_iteminvalue:
reduce_item(key+'_'+to_string(i), sub_item)
i=i+1

#Reduction Condition 2
eliftype(value) isdict:
sub_keys=value.keys()
forsub_keyinsub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])

#Base Condition
else:
reduced_item[to_string(key)] =to_string(value)


if__name__=="__main__":
iflen(sys.argv) !=4:
print ("\nUsage: python json_to_csv.py <node_name> <json_in_file_path> <csv_out_file_path>\n")
else:
#Reading arguments
node=sys.argv[1]
json_file_path=sys.argv[2]
csv_file_path=sys.argv[3]

fp=open(json_file_path, 'r')
json_value=fp.read()
raw_data=json.loads(json_value)

try:
data_to_be_processed=raw_data[node]
except:
data_to_be_processed=raw_data

processed_data= []
header= []
foritemindata_to_be_processed:
reduced_item= {}
reduce_item(node, item)

header+=reduced_item.keys()

processed_data.append(reduced_item)

header=list(set(header))
header.sort()

withopen(csv_file_path, 'w+') asf:
writer=csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
writer.writeheader()
forrowinprocessed_data:
writer.writerow(row)

print ("Just completed writing csv file with %d columns"%len(header))