NB. This code is neither efficient nor elegant, but it gets the job done. Let me know if you have any problems or suggestions.
from random import randint
import re
# ----- BEGIN Danny Obrien's Syllable Counter ----- #
SubSyl = [
'cial',
'tia',
'cius',
'cious',
'giu', # belgium!
'ion',
'iou',
'sia$',
'.ely$', # absolutely! (but not ely!)
]
AddSyl = [
'ia',
'riet',
'dien',
'iu',
'io',
'ii',
'[aeiouym]bl$', # -Vble, plus -mble
'[aeiou]{3}', # agreeable
'^mc',
'ism$', # -isms
'([^aeiouy])\1l$', # middle twiddle battle bottle, etc.
'[^l]lien', # alien, salient [1]
'^coa[dglx].', # [2]
'[^gq]ua[^auieo]', # i think this fixes more than it breaks
'dnt$', # couldn't
]
def syllables(word):
mungedword = re.sub('e$','',word.lower())
splitword = re.split(r'[^aeiouy]+', mungedword)
splitword = [ x for x in splitword if (x != '') ] # hmm
syllables = 0
for i in SubSyl:
if re.search(i,mungedword):
syllables -= 1
for i in AddSyl:
if re.search(i,mungedword):
syllables += 1
if len(mungedword) == 1: syllables =+ 1
syllables += len(splitword)
if syllables == 0: syllables = 1
return syllables
# ----- END Danny Obrien's Syllable Counter ----- #
# Open the huge text file
f = open('/Users/jsomers/Desktop/ulyss12.txt', 'rU')
# Make a list of clean sentences
lines = [" " + line[:-1] for line in f]
big = "".join(map(str, lines))
sentences = big.split(". ")
# Make the two sub-lists
fives, sevens = [], []
for s in sentences:
if sum([syllables(word) for word in s.split(" ")]) == 5:
fives.append(s)
elif sum([syllables(word) for word in s.split(" ")]) == 7:
sevens.append(s)
# Print the haikus
for i in range(100):
print fives[randint(0, len(fives) - 1)], \
'\n', sevens[randint(0, len(sevens) - 1)], '\n', \
fives[randint(0, len(fives) - 1)], '\n\n'