-
Notifications
You must be signed in to change notification settings - Fork 4
/
test_mini_spider.py
147 lines (114 loc) · 3.95 KB
/
test_mini_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: UTF-8 -*-
'''
Last modified time: 2015-02-25 17:53:01
Edit time: 2015-02-25 17:53:25
File name: test_mini_spider.py
Edit by caimaoy
'''
__author__ = 'caimaoy'
import unittest
import mini_spider_rebuild as mini
import os
from httpretty import HTTPretty, httprettified
FILE_DIR = os.path.dirname(os.path.abspath(__file__))
class FuncTest(unittest.TestCase):
# 初始化工作
def setUp(self):
pass
# 退出清理工作
def tearDown(self):
pass
def test_trans_url(self):
inpt = r'/\:?<>"\*'
outpt = r'_________'
self.assertEqual(mini.trans_url(inpt), outpt, 'failed')
self.assertEqual(mini.trans_url('1.+'), '1.+', 'failed')
class DownloadTest(unittest.TestCase):
fm = ''.join(['file:///', os.path.join(FILE_DIR, 'test_download_file')])
to = os.path.join(FILE_DIR, 'test_download_file_to')
# 初始化工作
def setUp(self):
pass
# 退出清理工作
def tearDown(self):
if os.path.exists(self.to):
os.remove(self.to)
def test_download_file(self):
mini.download_file_to_local(self.fm, self.to)
self.assertTrue(os.path.exists(self.to))
def test_error_url_(self):
mini.download_file_to_local('error_url', self.to)
class DownloadWorkerTest(unittest.TestCase):
fm = ''.join(['file:///', os.path.join(FILE_DIR, 'test_download_file')])
to = os.path.join(FILE_DIR, 'test_download_file_to')
# 初始化工作
def setUp(self):
pass
# 退出清理工作
def tearDown(self):
pass
'''
if os.path.exists(self.to):
os.remove(self.to)
'''
@httprettified
def test_get_url_text(self):
HTTPretty.register_uri(HTTPretty.GET, 'http://www.test.com',
body='xxx')
dw = mini.DownloadWorker('http://www.test.com', '.', 'reg', 0, 3, 1, 1)
self.assertEqual(dw.get_url_text(), 'xxx')
def test_create_download_url(self):
dw = mini.DownloadWorker('http://www.test.com', '.', 'reg', 0, 3, 1, 1)
test_fuc = dw.create_download_url
test_dic = {
r'http://xxx': 'http://xxx',
r'//xxx': 'http://xxx',
r'xxx': 'http://www.test.com/xxx',
}
for k in test_dic:
self.assertEqual(test_fuc(k), test_dic[k])
@httprettified
def test_get_url_text_404(self):
HTTPretty.register_uri(HTTPretty.GET, 'http://www.test.com',
body='',
status=404
)
dw = mini.DownloadWorker('http://www.test.com', '.', 'reg', 0, 3, 1, 1)
dw.get_url_text()
def test_get_url_text_wrong_url(self):
dw = mini.DownloadWorker('http://wrong.url.me', '.', 'reg', 0, 3, 1, 1)
dw.get_url_text()
def test_download_file(self):
dw = mini.DownloadWorker('http://no.url.me', '.', 'reg', 0, 3, 1, 1)
to = dw.url_to_localfile(self.fm)
def del_file(to):
if os.path.exists(to):
os.remove(to)
del_file(to)
dw.download_file(self.fm)
self.assertTrue(os.path.exists(dw.url_to_localfile(self.fm)))
dw.download_file(self.fm)
self.assertTrue(os.path.exists(dw.url_to_localfile(self.fm)))
del_file(to)
class SpiderManagerTest(unittest.TestCase):
# 初始化工作
def setUp(self):
self.config_file = os.path.join(FILE_DIR, 'spider.conf')
self.wrong_url_config = os.path.join(FILE_DIR, 'wrong_url.conf')
# 退出清理工作
def tearDown(self):
pass
def test_spidermanage_ini(self):
mini.SpiderManager(self.config_file)
self.assertRaises(
AttributeError,
mini.SpiderManager,
'wrong_file'
)
self.assertRaises(
IOError,
mini.SpiderManager,
self.wrong_url_config
)
if __name__ == '__main__':
unittest.main()