-
Notifications
You must be signed in to change notification settings - Fork 0
/
naver_webtoon_scrapping.py
55 lines (41 loc) · 1.79 KB
/
naver_webtoon_scrapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import requests
from bs4 import BeautifulSoup
import csv
import re
naver_wt_URL = 'https://comic.naver.com/webtoon/weekday.nhn'
naver_response = requests.get(naver_wt_URL)
soup = BeautifulSoup(naver_response.text,'html.parser')
naver_wts = soup.select('#content > div.list_area.daily_all > div.col')
naver_wt_names = []
naver_wt_days = []
naver_wt_ids = []
for naver_wt in naver_wts:
naver_wt_id = naver_wt.select( 'div.col_inner > ul > li > a')
for info in naver_wt_id:
naver_wt_ids.append(info['href'].split('&')[-2].split('=')[-1])
naver_wt_days.append(info['href'].split('=')[-1])
naver_wt_names.append(info.text)
# print(naver_wt_names)
naver_wt_intro = []
naver_wt_url = []
for i in range(len(naver_wt_names)):
naver_wt_id = naver_wt_ids[i]
naver_wt_day = naver_wt_days[i]
naver_detail_URL = 'https://comic.naver.com/webtoon/list.nhn?titleId='+naver_wt_id+'&weekday='+naver_wt_day
naver_detail_response = requests.get(naver_detail_URL)
soup = BeautifulSoup(naver_detail_response.text, 'html.parser')
naver_intros = soup.select('#content > div.comicinfo > div.detail > p')
naver_intros = re.sub("<.*?>", " ", str(naver_intros))
naver_intros = naver_intros.replace('[',' ').replace(']', ' ').strip()
naver_wt_intro.append(naver_intros)
naver_wt_url.append(naver_detail_URL)
naver_webtoon = {}
for i in range(len(naver_wt_names)):
naver_webtoon[naver_wt_names[i]] = {
'intro': naver_wt_intro[i],
'url' : naver_wt_url[i]
}
with open('naver_webtoon.csv', 'w', newline='',encoding='utf-8-sig') as file:
writer = csv.DictWriter(file, fieldnames = ['name', 'intro', 'url'])
for key in naver_webtoon.keys():
writer.writerow({'name' : key, 'intro' : naver_webtoon[key]['intro'], 'url' : naver_webtoon[key]['url']})