-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of https://github.com/BDP-TEAM/Trip-Analysis
- Loading branch information
Showing
28 changed files
with
30,257 additions
and
327 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
# Trip-Analysis | ||
여행 카페 게시글 분석을 활용한 여행 선호도 확인 및 여행지 경쟁력 조사 | ||
여행 카페 게시글 분석을 활용한 여행지 선호도 확인 및 상위 여행지에 대한 트렌드 분석 |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import os | ||
from selenium import webdriver | ||
from bs4 import BeautifulSoup | ||
from selenium.webdriver.chrome.service import Service | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
from selenium.webdriver.chrome.options import Options | ||
from selenium.webdriver.common.by import By | ||
import time | ||
import pyperclip | ||
import pyautogui | ||
import random | ||
|
||
user_agent = "user-agent header" | ||
options = Options() | ||
options.add_argument('user-agent=' + user_agent) | ||
options.add_argument('--mute-audio') | ||
options.add_experimental_option("detach", True) | ||
options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), chrome_options = options) | ||
driver.implicitly_wait(5) | ||
|
||
uid = 'naverid' | ||
upw = 'naverpassword' | ||
|
||
url = ('https://nid.naver.com/nidlogin.login?mode=form&url=https%3A%2F%2Fwww.naver.com') | ||
|
||
driver.get(url) | ||
|
||
tag_id = driver.find_element(By.CSS_SELECTOR,'#id') | ||
tag_id.click() | ||
pyperclip.copy(uid) | ||
print(pyperclip.paste()) | ||
pyautogui.keyDown ('ctrl') | ||
pyautogui.press ('v') | ||
pyautogui.keyUp ('ctrl') | ||
time.sleep(2) | ||
|
||
tag_pw = driver.find_element(By.CSS_SELECTOR,'#pw') | ||
tag_pw.click() | ||
pyperclip.copy(upw) | ||
print(pyperclip.paste()) | ||
pyautogui.keyDown ('ctrl') | ||
pyautogui.press ('v') | ||
pyautogui.keyUp ('ctrl') | ||
time.sleep(2) | ||
|
||
login_btn = driver.find_element(By.ID,'log.login') | ||
login_btn.click() | ||
time.sleep(2) | ||
|
||
def move_page( page ): | ||
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=all%26search.exact=%26search.include=%26userDisplay=50%26search.exclude=%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.includeAll=%26search.query=%B9%D9%B8%A3%BC%BF%B7%CE%B3%AA%26search.viewtype=title%26search.page={}'.format(page) | ||
return key_url | ||
|
||
data = [] | ||
|
||
for i in range( 1, 101 ): | ||
url = move_page( i ) | ||
driver.get( url ) | ||
driver.switch_to.frame('cafe_main') | ||
search_url = driver.page_source | ||
soup = BeautifulSoup(search_url, 'html.parser') | ||
subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article' | ||
subjects = soup.select(subj_locate) | ||
|
||
for subject in subjects: | ||
print(subject) | ||
sub = subject.text.strip() | ||
|
||
data.append(sub) | ||
time.sleep( random.uniform(2,4) ) | ||
|
||
c = os.path.exists( 'barcelona.txt' ) | ||
if c: | ||
os.remove( 'barcelona.txt' ) | ||
|
||
with open( 'barcelona.txt', 'w', encoding='utf-8' ) as f: | ||
for line in data: | ||
for l in line: | ||
f.write( l ) | ||
f.write( '\n' ) | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import os | ||
from selenium import webdriver | ||
from bs4 import BeautifulSoup | ||
from selenium.webdriver.chrome.service import Service | ||
from webdriver_manager.chrome import ChromeDriverManager | ||
from selenium.webdriver.chrome.options import Options | ||
from selenium.webdriver.common.by import By | ||
import time | ||
import pyperclip | ||
import pyautogui | ||
import random | ||
|
||
user_agent = "user-agent header" | ||
options = Options() | ||
options.add_argument('user-agent=' + user_agent) | ||
options.add_argument('--mute-audio') | ||
options.add_experimental_option("detach", True) | ||
options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
|
||
driver = webdriver.Chrome(service = Service(ChromeDriverManager().install()), chrome_options = options) | ||
driver.implicitly_wait(5) | ||
|
||
uid = '네이버id' | ||
upw = '네이버password' | ||
|
||
url = ('https://nid.naver.com/nidlogin.login?mode=form&url=https%3A%2F%2Fwww.naver.com') | ||
|
||
driver.get(url) | ||
|
||
tag_id = driver.find_element(By.CSS_SELECTOR,'#id') | ||
tag_id.click() | ||
pyperclip.copy(uid) | ||
print(pyperclip.paste()) | ||
pyautogui.keyDown ('command') | ||
pyautogui.press ('v') | ||
pyautogui.keyUp ('command') | ||
time.sleep(2) | ||
|
||
tag_pw = driver.find_element(By.CSS_SELECTOR,'#pw') | ||
tag_pw.click() | ||
pyperclip.copy(upw) | ||
print(pyperclip.paste()) | ||
pyautogui.keyDown ('command') | ||
pyautogui.press ('v') | ||
pyautogui.keyUp ('command') | ||
time.sleep(2) | ||
|
||
login_btn = driver.find_element(By.ID,'log.login') | ||
login_btn.click() | ||
time.sleep(2) | ||
|
||
def move_page( page ): | ||
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=all%26search.exact=%26search.include=%26userDisplay=50%26search.exclude=%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.includeAll=%26search.query=%B7%B1%B4%F8%26search.viewtype=title%26search.page={}'.format(page) | ||
return key_url | ||
|
||
def next_page( page ): | ||
key_url = 'https://cafe.naver.com/firenze?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10209062%26search.menuid=275%26search.media=0%26search.searchdate=2022-01-012022-04-09%26search.defaultValue=1%26userDisplay=50%26search.option=0%26search.sortBy=date%26search.searchBy=1%26search.query=%B7%B1%B4%F8%26search.viewtype=title%26search.page={}'.format(page) | ||
return key_url | ||
|
||
data = [] | ||
|
||
for i in range( 1, 101 ): | ||
url = move_page( i ) | ||
driver.get( url ) | ||
|
||
driver.switch_to.frame('cafe_main') | ||
|
||
search_url = driver.page_source | ||
soup = BeautifulSoup(search_url, 'html.parser') | ||
|
||
subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article' | ||
subjects = soup.select(subj_locate) | ||
|
||
for subject in subjects: | ||
sub = subject.text.strip() | ||
|
||
data.append(sub) | ||
time.sleep( random.uniform(2,4) ) | ||
|
||
for i in range( 1, 5 ): | ||
url = next_page( i ) | ||
driver.get( url ) | ||
|
||
driver.switch_to.frame('cafe_main') | ||
|
||
search_url = driver.page_source | ||
soup = BeautifulSoup(search_url, 'html.parser') | ||
|
||
subj_locate = '#main-area > div:nth-child(5) > table > tbody > tr:nth-child(n) > td.td_article > div.board-list > div > a.article' | ||
subjects = soup.select(subj_locate) | ||
|
||
for subject in subjects: | ||
sub = subject.text.strip() | ||
|
||
data.append(sub) | ||
time.sleep( random.uniform(2,4) ) | ||
|
||
c = os.path.exists( 'london.txt' ) | ||
if c: | ||
os.remove( 'london.txt' ) | ||
|
||
with open( 'london.txt', 'w', encoding='utf-8' ) as f: | ||
for line in data: | ||
for l in line: | ||
f.write( l ) | ||
f.write( '\n' ) |
Oops, something went wrong.