#!/usr/bin/env python # - Make this pretty and put it on the Internet # - Highlight the modules written in C in red. # - Try and draw an area plot # - Perform similar queries using Cheese shop / google search results import pylab from pylab import * from operator import itemgetter f = file('results.csv') freqs = [] labels = [] for line in f.readlines(): try: if line.strip().startswith("#"): continue label, freq = line.strip().split(",") freq = int(freq) labels.append(label) freqs.append(freq) except: print line raise pairs = list(reversed(sorted(zip(freqs, labels), reverse=True)[0:300])) freqs = map(itemgetter(0), pairs) labels = map(itemgetter(1), pairs) name_locations = range(1, len(freqs) + 1) width = 0.5 barh(name_locations, freqs, height=width, align='center') yticks(name_locations, labels) fstep = int(round(10**floor(log10(max(freqs))))) // 2 ticks = range(0, (max(freqs) + fstep - 1) // fstep * fstep + fstep, fstep) frequence_labels = ticks xticks(ticks, frequence_labels) ylim(0, name_locations[-1]+width*2) title(r"""Frequency of occurrences of the regular expression '(from|import)\s\b' found by google code for python modules in the standard library - top 300.""", horizontalalignment='center') xlabel("Frequency") ylabel("Module") gca().get_xaxis().tick_top() gca().get_xaxis().set_label_position('top') gca().get_yaxis().tick_left() gcf().set_size_inches(12, 60) gca().set_position([0.25, 0.04, 0.5, 0.91]) savefig('bar-chart.png')