Skip to content

Commit

Permalink
l.load_item - draft
Browse files Browse the repository at this point in the history
  • Loading branch information
Simon Hardy committed Jan 30, 2018
1 parent 785ed1a commit ff2d914
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 21 deletions.
21 changes: 0 additions & 21 deletions spiders/cordis_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,24 +31,3 @@ def parse(self, response):
#for eu in response.css('div.objective'):
item['Technology_Description'] = response.css('p::text').extract_first()
yield item
"""
def parse(self, response):
l = ItemLoader(item=CordisItem(), response=response)
l.add_xpath('Project_ACR', '//*[@id="dynamiccontent"]/div[1]/h1/text()')
l.add_xpath('Project_Title', '//*[@id="dynamiccontent"]/h2/text()')
l.add_xpath('Total_Cost', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[1]/text()')
l.add_xpath('EU_Contribution', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[2]/text()')
l.add_xpath('Coordinated_in', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[3]/text()')
l.add_xpath('Topic_s', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[1]/a/text()')
l.add_xpath('Call_for_Proposal', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[2]/text()')
l.add_xpath('Funding_scheme', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[3]/text()')
#l.add_xpath('Project_ID', '//*[@id="dynamiccontent"]/div[1]/text()'.re('[.0-9]+')') map(unicode.strip, response.xpath('.//*[@id="dynamiccontent"]/div[1]/text()').re('[.0-9]+'))
l.add_xpath('To', 'normalize-space(.//*[@id="dynamiccontent"]/div[2]/text()[3])')
l.add_xpath('From', 'normalize-space(.//*[@id="dynamiccontent"]/div[2]/text()[2])')
#l.add_xpath('Partners', 'response.css('.name').xpath('text()')')
#l.add_xpath('Country', 'response.css('.country').xpath('text()')')
#l.add_xpath('Activity', 'response.css('.contact').xpath('text()')')
l.add_css('Technology_Description', 'p::text') # error extract all p
return l.load_item()
"""
Binary file modified spiders/cordis_spider.pyc
Binary file not shown.
21 changes: 21 additions & 0 deletions spiders/draft_loader.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
def parse(self, response):
l = ItemLoader(item=CordisItem(), response=response)
l.add_xpath('Project_ACR', '//*[@id="dynamiccontent"]/div[1]/h1/text()')
l.add_xpath('Project_Title', '//*[@id="dynamiccontent"]/h2/text()')
l.add_xpath('Total_Cost', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[1]/text()')
l.add_xpath('EU_Contribution', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[2]/text()')
l.add_xpath('Coordinated_in', '//*[@id="dynamiccontent"]/div[3]/div/div[1]/div[3]/text()')
l.add_xpath('Topic_s', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[1]/a/text()')
l.add_xpath('Call_for_Proposal', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[2]/text()')
l.add_xpath('Funding_scheme', '//*[@id="dynamiccontent"]/div[3]/div/div[2]/div[3]/text()')
#l.add_xpath('Project_ID', '//*[@id="dynamiccontent"]/div[1]/text()'.re('[.0-9]+')') map(unicode.strip, response.xpath('.//*[@id="dynamiccontent"]/div[1]/text()').re('[.0-9]+'))
l.add_xpath('To', 'normalize-space(.//*[@id="dynamiccontent"]/div[2]/text()[3])')
l.add_xpath('From', 'normalize-space(.//*[@id="dynamiccontent"]/div[2]/text()[2])')
#l.add_xpath('Partners', 'response.css('.name').xpath('text()')')
#l.add_xpath('Country', 'response.css('.country').xpath('text()')')
#l.add_xpath('Activity', 'response.css('.contact').xpath('text()')')
l.add_css('Technology_Description', 'p::text') # error extract all p

return l.load_item()
"""

0 comments on commit ff2d914

Please sign in to comment.