import json
filename = "india_states.geojson" # This file provides all the geojson coordinates
with open(filename, 'r') as f:
datastore = json.load(f)
import pandas as pd
input_file = "infant_mortality.csv"
df = pd.read_csv(input_file,index_col="states")
df = df[:-1] # removing all_india statistics
for e in df.index:
found = False
for i in datastore["features"]:
if i["properties"]["NAME_1"] == e:
found = True
if not found:
print("no match found for" ,e)
I have to manually change the file so that I can plot. Very hacky solution below
for i in datastore["features"]:
print(i["properties"]["NAME_1"])
df["new_states"] = df.index
df.loc["Jammu & Kashmir", "new_states"] = "Jammu and Kashmir"
df.loc["Odisha", "new_states"] = "Orissa"
df.loc["Uttarakhand", "new_states"] = "Uttaranchal"
df.loc["A& N Islands", "new_states"] = "Andaman and Nicobar"
df.loc["D & N Haveli", "new_states"] = "Dadra and Nagar Haveli"
df.loc["Daman & Diu", "new_states"] = "Daman and Diu"
df.drop(index="Telangana", inplace=True) # Dropping telangana from the map. will update later.
for e in df.new_states:
found = False
for i in datastore["features"]:
if i["properties"]["NAME_1"] == e:
found = True
if not found:
print("no match found for" ,e)
df.set_index("new_states", inplace=True)
for e in df.index:
for i in datastore["features"]:
if i["properties"]["NAME_1"] == e:
for k in ["total", "urban", "rural"]:
i["properties"][k] = float(df.loc[e,k])
Below I create a smaller file.
new_datastore = {"type":"FeatureCollection"}
features = []
for e in datastore["features"]:
d = {"type":"Feature"}
d["geometry"] = e["geometry"]
d["properties"] = {"Name": e["properties"]["NAME_1"]}
for k in ["total", "urban", "rural"]:
d["properties"][k] = e["properties"][k]
features.append(d)
new_datastore["features"] = features
output_file = "kepler_"+input_file.strip(".csv") + ".geojson"
with open(output_file, 'w') as f:
json.dump(new_datastore, f)
a= df["total"].values.tolist()
a.sort()
import matplotlib.pyplot as plt
%matplotlib inline
plt.scatter(range(len(a)),a)
plt.axis('scaled')
Going with Quantile becuase of the horizontal line at 25. read this if confused