I am using a dataframe which includes the following columns:
Country, GNI, CarSalesPerCap. I am using kmeans to create clusters. In the algorithm i pass the dataframe with the two numeric columns: 'GNI', 'CarSalesPerCap'.
Then i am using plotly to create a scatter plot, where x-axis is the CarsalesPerCap and Y-axis is GNI. My question is, how am i going to add to the plot the corresponding country for each point plotted on the graph.
df = pd.read_sql_query(query,conn)
df = df.dropna()
#Cluster the data
kmeans = KMeans(n_clusters=6, random_state=0).fit(df1)
labels = kmeans.labels_
#Glue back to originaal data
df['clusters'] = labels
#Lets analyze the clusters
print (df)
cluster0=df.loc[df['clusters'] == 0]
cluster1=df.loc[df['clusters'] == 1]
cluster2=df.loc[df['clusters'] == 2]
cluster3=df.loc[df['clusters'] == 3]
cluster4=df.loc[df['clusters'] == 4]
cluster5=df.loc[df['clusters'] == 5]
p0 = go.Scatter(x=cluster0['CarSalesPerCap'],
y= cluster0['GNI'],
mode='markers',
marker=dict(color='black')
)
p1 = go.Scatter(x=cluster1['CarSalesPerCap'],
y= cluster1['GNI'],
mode='markers',
marker=dict(color='teal')
)
p2 = go.Scatter(x=cluster2['CarSalesPerCap'],
y= cluster2['GNI'],
mode='markers',
marker=dict(color='grey')
)
p3 = go.Scatter(x=cluster3['CarSalesPerCap'],
y= cluster3['GNI'],
mode='markers',
marker=dict(color='pink')
)
p4 = go.Scatter(x=cluster4['CarSalesPerCap'],
y= cluster4['GNI'],
mode='markers',
marker=dict(color='purple')
)
p5 = go.Scatter(x=cluster5['CarSalesPerCap'],
y= cluster5['GNI'],
mode='markers',
marker=dict(color='orange')
)
layout = go.Layout(xaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title = 'CarSalesPerCap'),
yaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title='GNI'),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p0,p1,p2,p3,p4,p5], layout=layout)
py.offline.plot(fig)
