Example
from urllib.request import urlopen
from collections import Counter
import re
conn = urlopen('http://textfiles.com/100/dodontae.hum')
lines = conn.readlines()
conn.close()
# readlines() returns byte strings
data = ''.join([line.decode('utf-8') for line in lines])
# replace non-letters with a space
re.sub('[^A-Za-z]', ' ', data)
# condense successive whitespace into a single space
# the underscore retrieves the most recent output
re.sub('\s+', ' ', _)
# normalize the text by lowercasing and removing leading and trailing whitespace
_.lower().strip()
# split into words on space
words = _.split(' ')
from collections import Counter
word_count = Counter()
for word in words:
word_count[word[0]] += 1
word_count.most_common()