#!/usr/bin/env python
from BeautifulSoup import BeautifulSoup, Tag, NavigableString
from PyRSS2Gen import RSSItem, Guid
import ScrapeNFeed
import string
import urllib
import datetime
class EatTheSeasonsFeed(ScrapeNFeed.ScrapedFeed):
def HTML2RSS(self, headers, body):
link = "http://eattheseasons.co.uk"
soup = BeautifulSoup(body)
veggieContainer = soup.find('font', text="VEGETABLES").parent
# Process the contents of veggieContainer -- scrub whitespace, replace '|' with readable tag elements
#
stripList = []
veggieStrings = str(veggieContainer)
veggieList = veggieStrings.split('|')
for item in veggieList:
processed = item.strip() + r'
'
stripList.append(processed)
# Read the processed contents of veggieContainer back into BeautifulSoup
#
soup = BeautifulSoup(''.join(stripList))
breaks = soup.findAll('br')
for item in breaks:
item.extract()
text = soup.findAll(text=True)
for item in text:
if item=='\n':
item.extract()
soup.find(text='VEGETABLES').extract()
# print soup.findAll(text=True) # (for debug)
# Turn soup into an unordered list
#
newSoup = BeautifulSoup()
tag1 = Tag(newSoup, "ul")
tag2 = Tag(newSoup, "li")
index = 0
newSoup.insert(0, tag1)
for item in soup.findAll(text=True):
food = NavigableString(""+item+r"")
tag1.insert(index, food)
index = index + 1
# print newSoup # (for debug)
# Begin feed
items=[]
items.append(RSSItem(title="Veggies for "+str(datetime.date.today()),
description=newSoup,
link=link))
self.addRSSItems(items)
EatTheSeasonsFeed.load("Food In Season This Week",
'http://www.eattheseasons.co.uk',
"Keep track of the fruits and vegetables available at market this week!",
'seasonal.xml',
'seasonal.pickle',
managingEditor='lev@mollusc.org (Lev Rickards)')