-
Notifications
You must be signed in to change notification settings - Fork 0
/
ncm.py
27 lines (25 loc) · 811 Bytes
/
ncm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#coding:utf-8
import requests
import codecs
from bs4 import BeautifulSoup
FRONT = ""
END = ""
s = requests.session()
f = codecs.open('result.txt','w','utf-8')
for i in range(0, 1263, 30):
url = FRONT + str(i) + END
target = s.get(url)
html = target.text
soup = BeautifulSoup(html)
tables = soup.find_all("table", class_="collapseParas")
table = tables[0]
text = table.text
text = text.replace(u'\r\n\t\t\t\r\n\r\n\t\t\t\n\n\n\xa0\xa0\xa0\xa0\xa0\n\n\n', "\n")
text = text.replace(u"\xa0\r\n\r\n\t\t\t", "\t")
text = text.replace(u"\n\xa0\xa0\n", "\t")
text = text.replace(u"\n\n\n\n", "\n")
text = text.replace(u"\n\n\n", "")
text = text.replace(u"\xa0\xa0\xa0\xa0\xa0","\n")
text = text.replace(u"\n\n", "\n")
f.write(text)
print 'ok' + str(i)