I have a list of tuples with the countries and the number of times they occur. I have 175 countries all with long names.
When I chart them, I get:
As you can see, everything is very bunched up, there is no space, you can barely read anything.
Code I use (the original data file is huge, but this contains my matplotlib specific code):
def tupleCounts2Percents(inputList):
total = sum(x[1] for x in inputList)*1.0
return [(x[0], 1.*x[1]/total) for x in inputList]
def autolabel(rects,labels):
# attach some text labels
for i,(rect,label) in enumerate(zip(rects,labels)):
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
label,
ha='center', va='bottom',fontsize=6,style='italic')
def countryChartList(inputlist,path):
seen_countries = Counter()
for dict in inputlist:
seen_countries += Counter(dict['location-value-pair'].keys())
seen_countries = seen_countries.most_common()
seen_countries_percentage = map(itemgetter(1), tupleCounts2Percents(seen_countries))
seen_countries_percentage = ['{:.2%}'.format(item)for item in seen_countries_percentage]
yvals = map(itemgetter(1), seen_countries)
xvals = map(itemgetter(0), seen_countries)
plt.figure()
countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9)
plt.xticks(range(len(seen_countries)), xvals,rotation=90)
plot_margin = 0.25
x0, x1, y0, y1 = plt.axis()
plt.axis((x0,
x1,
y0,
y1+plot_margin))
plt.title('Countries in Dataset')
plt.xlabel('Countries in Data')
plt.ylabel('Occurrences')
plt.tick_params(axis='both', which='major', labelsize=6)
plt.tick_params(axis='both', which='minor', labelsize=6)
plt.tight_layout()
autolabel(countrychart,seen_countries_percentage)
plt.savefig(path)
plt.clf()
An idea of what the dict I feed in looks like is:
list = [
{
"location-value-pair": {
"Austria": 234
}
},
{
"location-value-pair": {
"Azerbaijan": 20006.0
}
},
{
"location-value-pair": {
"Germany": 4231
}
},
{
"location-value-pair": {
"United States": 12121
}
},
{
"location-value-pair": {
"Germany": 65445
}
},
{
"location-value-pair": {
"UK": 846744
}
}
}
]
How do I:
- Make things so one can read them - would the answer be a histogram with bins instead of a bar plot? Maybe stepping every 10%?
- How do I make it so the tick labels and the labels above the bars (the percentages) don't overlap?
- Any other insight welcome (e.g. bars with gradient colours, red to yellow)?
EDIT
I reduced the number of countries to just the top 50, made bars more transparent, and changed ticks to rotate by 45 degrees. I still find the first bar has a tick which crosses the y axis to it is unreadable. How can I change this?
Changed to countrychart = plt.bar(range(len(seen_countries)), yvals, width=0.9,alpha=0.6) and also rotation=45 to the .text() argument in the autolabel function.


