forked from oliciv/python-gumtree-scraper
-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_scraper.py
97 lines (64 loc) · 2.03 KB
/
test_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
"""TestScraper
Tests the scraper
Usage:
test_scraper.py debug listing
test_scraper.py debug ad
Options:
-h --help Show this screen.
--version Show version.
"""
from entities.GTListingQuery import GumtreeListingQuery
from docopt import docopt
__author__ = "Indika Piyasena"
import os, sys
import logging
logger = logging.getLogger(__name__)
class TestScraper:
def __init__(self):
self.configure_logging()
def process(self):
self.arguments = docopt(__doc__, version='TestScraper 0.2')
logger.info('TestScraper started...')
self.base_url = 'http://www.gumtree.com.au'
self.query_objects = [
GumtreeListingQuery(self.base_url, 'west-end-brisbane',
'c18294l3005921'),
]
if self.arguments['debug']:
if self.arguments['ad']:
self.debug_ad()
if self.arguments['listing']:
self.debug_listing()
pass
def debug_ad(self):
# Get the first ad in the cache
# Need a constructor: create ad with file
pass
def debug_listing(self):
query = GumtreeListingQuery(self.base_url, 'west-end-brisbane',
'c18294l3005921')
print query.cache_file_name()
pass
def configure_logging(self):
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
pass
def log(self):
pass
def test_something():
test_scraper = TestScraper()
test_scraper.create_record()
assert(True)
if __name__ == "__main__":
print "Running TestScraper in stand-alone-mode"
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
test_scraper = TestScraper()
test_scraper.process()