From 13a9bdada5d77d00fdf54e6e13f6060279f8b477 Mon Sep 17 00:00:00 2001
From: Joey Erskine <erskinejoey@gmail.com>
Date: Tue, 18 Jun 2013 21:06:41 -0500
Subject: [PATCH] added feed spider for noRecipes

---
 .../spiders/norecipes_feedspider.py           | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 scrapy_proj/openrecipes/spiders/norecipes_feedspider.py

diff --git a/scrapy_proj/openrecipes/spiders/norecipes_feedspider.py b/scrapy_proj/openrecipes/spiders/norecipes_feedspider.py
new file mode 100644
index 0000000..93e6745
--- /dev/null
+++ b/scrapy_proj/openrecipes/spiders/norecipes_feedspider.py
@@ -0,0 +1,37 @@
+from scrapy.spider import BaseSpider
+from scrapy.http import Request
+from scrapy.selector import XmlXPathSelector
+from openrecipes.spiders.norecipes_spider import NorecipesMixin
+
+
+class NorecipesSpider(BaseSpider, NorecipesMixin):
+    """
+    This parses the RSS feed for thepioneerwoman.com, grabs the original
+    links to each entry, and scrapes just those pages. This should be used
+    to keep up to date after we have backfilled the existing recipes by
+    crawling the whole site
+    """
+    name = "norecipes.feed"
+    allowed_domains = [
+        "norecipes.com",
+        "feeds.feedburner.com",
+        "feedproxy.google.com"
+    ]
+    start_urls = [
+        "http://feeds.feedburner.com/NoRecipes",
+    ]
+
+    def parse(self, response):
+        """
+        We define a custom parser here because we need to get the link from
+        the feed item and then follow it to get the recipe data.
+
+        Getting the data from <content:encoded> seems overly complex, as we
+        would have to decode all the encoded characters and then build a DOM
+        from that.
+        """
+        xxs = XmlXPathSelector(response)
+        links = xxs.select("//item/*[local-name()='origLink']/text()").extract()
+
+        # self.parse_item comes from ThepioneerwomanMixin
+        return [Request(x, callback=self.parse_item) for x in links]
\ No newline at end of file