NB. This code is neither efficient nor elegant, but it gets the job done. Let me know if you have any problems or suggestions.
from random import randint import re # ----- BEGIN Danny Obrien's Syllable Counter ----- # SubSyl = [ 'cial', 'tia', 'cius', 'cious', 'giu', # belgium! 'ion', 'iou', 'sia$', '.ely$', # absolutely! (but not ely!) ] AddSyl = [ 'ia', 'riet', 'dien', 'iu', 'io', 'ii', '[aeiouym]bl$', # -Vble, plus -mble '[aeiou]{3}', # agreeable '^mc', 'ism$', # -isms '([^aeiouy])\1l$', # middle twiddle battle bottle, etc. '[^l]lien', # alien, salient [1] '^coa[dglx].', # [2] '[^gq]ua[^auieo]', # i think this fixes more than it breaks 'dnt$', # couldn't ] def syllables(word): mungedword = re.sub('e$','',word.lower()) splitword = re.split(r'[^aeiouy]+', mungedword) splitword = [ x for x in splitword if (x != '') ] # hmm syllables = 0 for i in SubSyl: if re.search(i,mungedword): syllables -= 1 for i in AddSyl: if re.search(i,mungedword): syllables += 1 if len(mungedword) == 1: syllables =+ 1 syllables += len(splitword) if syllables == 0: syllables = 1 return syllables # ----- END Danny Obrien's Syllable Counter ----- # # Open the huge text file f = open('/Users/jsomers/Desktop/ulyss12.txt', 'rU') # Make a list of clean sentences lines = [" " + line[:-1] for line in f] big = "".join(map(str, lines)) sentences = big.split(". ") # Make the two sub-lists fives, sevens = [], [] for s in sentences: if sum([syllables(word) for word in s.split(" ")]) == 5: fives.append(s) elif sum([syllables(word) for word in s.split(" ")]) == 7: sevens.append(s) # Print the haikus for i in range(100): print fives[randint(0, len(fives) - 1)], \ '\n', sevens[randint(0, len(sevens) - 1)], '\n', \ fives[randint(0, len(fives) - 1)], '\n\n'