Tracking Tiger — The Code

If this code scares you, keep in mind that there are plenty of ways to do scraping in Python, including BeautifulSoup and scrape.py. Yet even these have a non-trivial learning curve and a lot of rough edges. It makes you think the semantic web is really worth pursuing...

						import time
						import re
						import urllib
						import smtplib
						

						def update(the_score):
							# wait 5 minutes to poll the site
							time.sleep(300)
							
							# grab the raw leaderboard
							html = urllib.urlopen('http://www.majorschampionships.com/masters/2008/scoring/').readlines()

							# Non-greedy regex to pick out relevant blocks
							t = re.compile('\/scorecard\/\d+.html">.*?\d+<\/td>') 

							raw = t.findall(str(html))

							# Fill out our leaderboard, cleaning things up along the way
							board = []
							for item in raw:
								r = re.compile('\/scorecard\/\d+.html">')
								r2 = re.compile('')
								close = r2.sub('', r.sub('', item))
								score = re.compile('-?\d+')
								crap = re.compile('<.*')
								name = crap.sub('', close)
								# Return a list of tuples: [('Trevor Immelman', -11), ('Brandt Snedeker', -9), ...]
								board.append((name, int(score.findall(close)[0])))
							
							# Set some useful variables
							leader = board[0][0]
							leader_score = board[0][1]

							tiger_score = 0
							for guy in board:
								if guy[0] == 'Tiger Woods':
									tiger_score = guy[1]

							# If there is anything new to report, send out the e-mail.
							if tiger_score != the_score:
								the_score = tiger_score
								msg = open('/path/to/msg.txt', 'r').read()
								# use gmail as our SMTP server
								server = SMTP('smtp.gmail.com', 587)
								server.ehlo()
								server.starttls()
								server.ehlo()
								server.login('[redacted]@gmail.com', '[redacted]')
								if the_score > leader_score:
									# Compose the message
									m = msg.replace('[body]', 'Tiger is now at ' + str(the_score) + ' trailing ' + leader + ' by ' str(abs(leader_score - tiger_score)) strokes.')
								else:
									m = msg.replace('[body]', 'Tiger is leading the Masters.')
								# send and clean up; rinse and repeat.
								server.sendmail('[redacted]@gmail.com', '[redacted]@tmo.blackberry.net', m)
								server.quit()
							return update(the_score)