MOD_INDEX = "http://docs.python.org/modindex.html" GOOGLE_SEARCH_PREFIX = "http://www.google.com/codesearch?hl=en&lr=&" GOOGLE_SEARCH_TEMPLATE = "q=lang:python (import|from)\s%s\b" import sys # crazy hack - you might like to fix this sys.path.append("/usr/share/python-support/python-elementtree") from itertools import imap from operator import attrgetter from urllib import urlopen, quote_plus from lxml.etree import HTMLParser, parse def ImportHTML(uri, xpath): tree = parse(urlopen(uri), HTMLParser()) return map(attrgetter('text'), tree.xpath(xpath)) def find_refs(name): uri = GOOGLE_SEARCH_PREFIX + quote_plus(GOOGLE_SEARCH_TEMPLATE % name, '=') r = ImportHTML(uri, '//*[@id="statustext"]//font/b[3]') if r: return r[0] return "" def tuple_map(*funcs): def mapper(x): return tuple(func(x) for func in funcs) return mapper def ident(x): return x modules = ImportHTML(MOD_INDEX, "//tt") results = imap(tuple_map(ident, find_refs), modules) for name, freq in results: print "%s, %s" % (name, freq)