-
Notifications
You must be signed in to change notification settings - Fork 118
/
config.yml
176 lines (155 loc) · 6.17 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#使用代理去获取代理IP
proxy:
host: 127.0.0.1
port: 10809
#配置信息
config:
#监听IP
ip: 0.0.0.0
#webApi监听端口
port: 8080
#http隧道代理端口
httpTunnelPort: 8111
#socket隧道代理端口
socketTunnelPort: 8112
#隧道代理更换时间秒
tunnelTime: 60
#可用IP数量小于‘proxyNum’时就去抓取
proxyNum: 30
#代理IP验证间隔秒
verifyTime: 1800
#抓取/检测状态线程数
threadNum: 200
#ip源
spider:
- name: '齐云代理'
#请求方式
method: 'GET'
#post传参用的请求体
#body: ''
#urls请求间隔/秒,防止频率过快被限制
interval: 0
#使用的请求头
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
#获取的地址
urls: 'https://proxy.ip3366.net/free/?action=china&page=1,https://proxy.ip3366.net/free/?action=china&page=2,https://proxy.ip3366.net/free/?action=china&page=3'
#获取IP的正则表达式,
ip: '\"IP\">(\d+?\.\d+?.\d+?\.\d+?)</td>'
#获取端口的正则表达式
port: '\"PORT\">(\d+?)</td>'
#是否使用代理去请求
proxy: false
- name: "89代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "https://www.89ip.cn/index_1.html,https://www.89ip.cn/index_2.html,https://www.89ip.cn/index_3.html"
ip: '<td>[\s]*?(\d+?\.\d+?.\d+?\.\d+?)[\s]*?</td>'
port: '<td>[\s]*?\d+?\.\d+?.\d+?\.\d+?[\s]*?</td>[\s]*?<td>[\s]*?(\d+?)[\s]*?</td>'
proxy: false
#每天需要跟换地址
- name: "开心代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "http://www.kxdaili.com/daili/ip/7713.html"
ip: '\[.+?\](\d+?\.\d+?.\d+?\.\d+?):\d+?@.+?#'
port: '\[.+?\]\d+?\.\d+?.\d+?\.\d+?:(\d+?)@.+?#'
proxy: false
- name: "快代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "http://www.ip3366.net/?stype=1&page=1,http://www.ip3366.net/?stype=1&page=2,http://www.ip3366.net/?stype=3&page=1,http://www.ip3366.net/?stype=3&page=2"
ip: '<td>[\s]*?(\d+?\.\d+?.\d+?\.\d+?)[\s]*?</td>'
port: '<td>[\s]*?\d+?\.\d+?.\d+?\.\d+?[\s]*?</td>[\s]*?<td>[\s]*?(\d+?)[\s]*?</td>'
proxy: false
- name: "高可用代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "https://ip.jiangxianli.com/?page=1"
ip: '<td>[\s]*?(\d+?\.\d+?.\d+?\.\d+?)[\s]*?</td>'
port: '<td>[\s]*?\d+?\.\d+?.\d+?\.\d+?[\s]*?</td>[\s]*?<td>[\s]*?(\d+?)[\s]*?</td>'
proxy: false
- name: "小舒代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "https://xsdaili.cn/dayProxy/ip/1796.html"
ip: '(\d+?\.\d+?.\d+?\.\d+?):\d+?@.+?#\[.+?\]'
port: '\d+?\.\d+?.\d+?\.\d+?:(\d+?)@.+?#\[.+?\]'
proxy: false
- name: "命运零代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "http://proxylist.fatezero.org/proxy.list"
ip: '\"host\": \"(.+?)\"'
port: '\"port\": (\d+)'
proxy: false
- name: '自由代理'
method: 'GET'
Headers:
#需要使用cookie,不然会有验证
User-Agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
urls: "https://www.freeproxylists.net/zh/?c=CN&u=50&page=1"
ip: '%3e(.{0,60}?)%3c%2f'
port: 'center\">(\d+?)</td><td'
proxy: true
- name: "db代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "http://proxydb.net/?protocol=http&country=CN,http://proxydb.net/?protocol=https&country=CN,http://proxydb.net/?protocol=socks5&country=CN"
ip: 'href=\"/(\d+?\.\d+?.\d+?\.\d+?)/\d+?#http.{0,1}\">'
port: 'href=\"/\d+?\.\d+?.\d+?\.\d+?/(\d+?)#http.{0,1}\">'
proxy: false
- name: "hidemy代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "https://hidemy.name/cn/proxy-list/?maxtime=1000&type=h#list,https://hidemy.name/cn/proxy-list/?maxtime=1000&type=h&start=64#list,https://hidemy.name/cn/proxy-list/?maxtime=1000&type=h&start=128#list,https://hidemy.name/cn/proxy-list/?maxtime=5000&type=5#list,https://hidemy.name/cn/proxy-list/?maxtime=5000&type=s#list"
ip: '<td>(\d+?\.\d+?.\d+?\.\d+?)</td><td>\d+</td>'
port: '<td>\d+?\.\d+?.\d+?\.\d+?</td><td>(\d+)</td>'
proxy: true
- name: "scrape代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
urls: "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=CN&ssl=all&anonymity=all"
ip: '(\d+?\.\d+?.\d+?\.\d+?):\d+'
port: '\d+?\.\d+?.\d+?\.\d+?:(\d+)'
anonymity: '透明'
proxy: true
- name: "my代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
urls: "https://www.my-proxy.com/free-socks-5-proxy.html,https://www.my-proxy.com/free-elite-proxy.html,https://www.my-proxy.com/free-anonymous-proxy.html"
ip: '>(\d+?\.\d+?.\d+?\.\d+?):\d+#'
port: '>\d+?\.\d+?.\d+?\.\d+?:(\d+)#'
proxy: true
- name: "proxy代理"
method: 'GET'
Headers:
User-Agent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
urls: "https://free-proxy-list.net/,https://www.us-proxy.org/,https://www.socks-proxy.net/"
ip: '(\d+?\.\d+?.\d+?\.\d+?):\d+'
port: '\d+?\.\d+?.\d+?\.\d+?:(\d+)'
proxy: true
#通过插件,扩展ip源
#spiderPlugin:
# #插件名
# - name: test
# #运行命令,返回的结果要符合格式
# run: 'text.exe'
#通过文件导入ip
#spiderFile:
# #插件名
# - name: test1
# #运行命令,返回的结果要符合格式
# path: 'ip.txt'
#
#