#!/usr/bin/env python from BeautifulSoup import BeautifulSoup, Tag, NavigableString from PyRSS2Gen import RSSItem, Guid import ScrapeNFeed import string import urllib import datetime class EatTheSeasonsFeed(ScrapeNFeed.ScrapedFeed): def HTML2RSS(self, headers, body): link = "http://eattheseasons.co.uk" soup = BeautifulSoup(body) veggieContainer = soup.find('font', text="VEGETABLES").parent # Process the contents of veggieContainer -- scrub whitespace, replace '|' with readable tag elements # stripList = [] veggieStrings = str(veggieContainer) veggieList = veggieStrings.split('|') for item in veggieList: processed = item.strip() + r'
  • ' stripList.append(processed) # Read the processed contents of veggieContainer back into BeautifulSoup # soup = BeautifulSoup(''.join(stripList)) breaks = soup.findAll('br') for item in breaks: item.extract() text = soup.findAll(text=True) for item in text: if item=='\n': item.extract() soup.find(text='VEGETABLES').extract() # print soup.findAll(text=True) # (for debug) # Turn soup into an unordered list # newSoup = BeautifulSoup() tag1 = Tag(newSoup, "ul") tag2 = Tag(newSoup, "li") index = 0 newSoup.insert(0, tag1) for item in soup.findAll(text=True): food = NavigableString("
  • "+item+r"
  • ") tag1.insert(index, food) index = index + 1 # print newSoup # (for debug) # Begin feed items=[] items.append(RSSItem(title="Veggies for "+str(datetime.date.today()), description=newSoup, link=link)) self.addRSSItems(items) EatTheSeasonsFeed.load("Food In Season This Week", 'http://www.eattheseasons.co.uk', "Keep track of the fruits and vegetables available at market this week!", 'seasonal.xml', 'seasonal.pickle', managingEditor='lev@mollusc.org (Lev Rickards)')