MOD_INDEX = "http://docs.python.org/modindex.html"
GOOGLE_SEARCH_PREFIX = "http://www.google.com/codesearch?hl=en&lr=&"
GOOGLE_SEARCH_TEMPLATE = "q=lang:python (import|from)\s%s\b"
import sys
# crazy hack - you might like to fix this
sys.path.append("/usr/share/python-support/python-elementtree")
from itertools import imap
from operator import attrgetter
from urllib import urlopen, quote_plus
from lxml.etree import HTMLParser, parse
def ImportHTML(uri, xpath):
tree = parse(urlopen(uri), HTMLParser())
return map(attrgetter('text'), tree.xpath(xpath))
def find_refs(name):
uri = GOOGLE_SEARCH_PREFIX + quote_plus(GOOGLE_SEARCH_TEMPLATE % name, '=')
r = ImportHTML(uri, '//*[@id="statustext"]//font/b[3]')
if r:
return r[0]
return ""
def tuple_map(*funcs):
def mapper(x):
return tuple(func(x) for func in funcs)
return mapper
def ident(x):
return x
modules = ImportHTML(MOD_INDEX, "//tt")
results = imap(tuple_map(ident, find_refs), modules)
for name, freq in results:
print "%s, %s" % (name, freq)