Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
jaehyung-99 committed Dec 12, 2022
2 parents 6b1fb23 + 24505f0 commit b427d0e
Show file tree
Hide file tree
Showing 28 changed files with 30,257 additions and 327 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

.DS_Store
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Trip-Analysis
여행 카페 게시글 분석을 활용한 여행 선호도 확인 및 여행지 경쟁력 조사
여행 카페 게시글 분석을 활용한 여행지 선호도 확인 및 상위 여행지에 대한 트렌드 분석
5,000 changes: 5,000 additions & 0 deletions crawling/barcelona.txt

Large diffs are not rendered by default.

82 changes: 82 additions & 0 deletions crawling/barcelonaPostCrawling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pyperclip
import pyautogui
import random

user_agent = "user-agent header"
options = Options()
options.add_argument('user-agent=' + user_agent)
options.add_argument('--mute-audio')
options.add_experimental_option("detach", True)
options.add_experimental_option("excludeSwitches", ["enable-logging"])
driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), chrome_options = options)
driver.implicitly_wait(5)

uid = 'naverid'
upw = 'naverpassword'

url = ('https://nid.naver.com/nidlogin.login?mode=form&url=https%3A%2F%2Fwww.naver.com')

driver.get(url)

tag_id = driver.find_element(By.CSS_SELECTOR,'#id')
tag_id.click()
pyperclip.copy(uid)
print(pyperclip.paste())
pyautogui.keyDown ('ctrl')
pyautogui.press ('v')
pyautogui.keyUp ('ctrl')
time.sleep(2)

tag_pw = driver.find_element(By.CSS_SELECTOR,'#pw')
tag_pw.click()
pyperclip.copy(upw)
print(pyperclip.paste())
pyautogui.keyDown ('ctrl')
pyautogui.press ('v')
pyautogui.keyUp ('ctrl')
time.sleep(2)

login_btn = driver.find_element(By.ID,'log.login')
login_btn.click()
time.sleep(2)

def move_page( page ):
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=all%26search.exact=%26search.include=%26userDisplay=50%26search.exclude=%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.includeAll=%26search.query=%B9%D9%B8%A3%BC%BF%B7%CE%B3%AA%26search.viewtype=title%26search.page={}'.format(page)
return key_url

data = []

for i in range( 1, 101 ):
url = move_page( i )
driver.get( url )
driver.switch_to.frame('cafe_main')
search_url = driver.page_source
soup = BeautifulSoup(search_url, 'html.parser')
subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article'
subjects = soup.select(subj_locate)

for subject in subjects:
print(subject)
sub = subject.text.strip()

data.append(sub)
time.sleep( random.uniform(2,4) )

c = os.path.exists( 'barcelona.txt' )
if c:
os.remove( 'barcelona.txt' )

with open( 'barcelona.txt', 'w', encoding='utf-8' ) as f:
for line in data:
for l in line:
f.write( l )
f.write( '\n' )

5,152 changes: 5,152 additions & 0 deletions crawling/london.txt

Large diffs are not rendered by default.

106 changes: 106 additions & 0 deletions crawling/londonPostCrawling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import pyperclip
import pyautogui
import random

user_agent = "user-agent header"
options = Options()
options.add_argument('user-agent=' + user_agent)
options.add_argument('--mute-audio')
options.add_experimental_option("detach", True)
options.add_experimental_option("excludeSwitches", ["enable-logging"])

driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), chrome_options = options)
driver.implicitly_wait(5)

uid = '네이버id'
upw = '네이버password'

url = ('https://nid.naver.com/nidlogin.login?mode=form&url=https%3A%2F%2Fwww.naver.com')

driver.get(url)

tag_id = driver.find_element(By.CSS_SELECTOR,'#id')
tag_id.click()
pyperclip.copy(uid)
print(pyperclip.paste())
pyautogui.keyDown ('command')
pyautogui.press ('v')
pyautogui.keyUp ('command')
time.sleep(2)

tag_pw = driver.find_element(By.CSS_SELECTOR,'#pw')
tag_pw.click()
pyperclip.copy(upw)
print(pyperclip.paste())
pyautogui.keyDown ('command')
pyautogui.press ('v')
pyautogui.keyUp ('command')
time.sleep(2)

login_btn = driver.find_element(By.ID,'log.login')
login_btn.click()
time.sleep(2)

def move_page( page ):
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=all%26search.exact=%26search.include=%26userDisplay=50%26search.exclude=%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.includeAll=%26search.query=%B7%B1%B4%F8%26search.viewtype=title%26search.page={}'.format(page)
return key_url

def next_page( page ):
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=2022-01-012022-04-09%26search.defaultValue=1%26userDisplay=50%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.query=%B7%B1%B4%F8%26search.viewtype=title%26search.page={}'.format(page)
return key_url

data = []

for i in range( 1, 101 ):
url = move_page( i )
driver.get( url )

driver.switch_to.frame('cafe_main')

search_url = driver.page_source
soup = BeautifulSoup(search_url, 'html.parser')

subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article'
subjects = soup.select(subj_locate)

for subject in subjects:
sub = subject.text.strip()

data.append(sub)
time.sleep( random.uniform(2,4) )

for i in range( 1, 5 ):
url = next_page( i )
driver.get( url )

driver.switch_to.frame('cafe_main')

search_url = driver.page_source
soup = BeautifulSoup(search_url, 'html.parser')

subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article'
subjects = soup.select(subj_locate)

for subject in subjects:
sub = subject.text.strip()

data.append(sub)
time.sleep( random.uniform(2,4) )

c = os.path.exists( 'london.txt' )
if c:
os.remove( 'london.txt' )

with open( 'london.txt', 'w', encoding='utf-8' ) as f:
for line in data:
for l in line:
f.write( l )
f.write( '\n' )
Loading

0 comments on commit b427d0e

Please sign in to comment.