diff --git a/my_scraper_project/.env b/my_scraper_project/.env new file mode 100644 index 00000000..c9e912a0 --- /dev/null +++ b/my_scraper_project/.env @@ -0,0 +1 @@ +CHROMEDRIVER_PATH=/path/to/chromedriver diff --git a/my_scraper_project/__pycache__/run.cpython-310.pyc b/my_scraper_project/__pycache__/run.cpython-310.pyc new file mode 100644 index 00000000..1e398305 Binary files /dev/null and b/my_scraper_project/__pycache__/run.cpython-310.pyc differ diff --git a/my_scraper_project/app/__init__.py b/my_scraper_project/app/__init__.py new file mode 100644 index 00000000..255b4a6c --- /dev/null +++ b/my_scraper_project/app/__init__.py @@ -0,0 +1,8 @@ +from flask import Flask + + +def create_app(): + app = Flask(__name__) + with app.app_context(): + from . import routes + return app diff --git a/my_scraper_project/app/__pycache__/__init__.cpython-310.pyc b/my_scraper_project/app/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 00000000..a99abe16 Binary files /dev/null and b/my_scraper_project/app/__pycache__/__init__.cpython-310.pyc differ diff --git a/my_scraper_project/app/__pycache__/routes.cpython-310.pyc b/my_scraper_project/app/__pycache__/routes.cpython-310.pyc new file mode 100644 index 00000000..3db3ba14 Binary files /dev/null and b/my_scraper_project/app/__pycache__/routes.cpython-310.pyc differ diff --git a/my_scraper_project/app/__pycache__/scraper.cpython-310.pyc b/my_scraper_project/app/__pycache__/scraper.cpython-310.pyc new file mode 100644 index 00000000..d240b949 Binary files /dev/null and b/my_scraper_project/app/__pycache__/scraper.cpython-310.pyc differ diff --git a/my_scraper_project/app/routes.py b/my_scraper_project/app/routes.py new file mode 100644 index 00000000..83bec7ab --- /dev/null +++ b/my_scraper_project/app/routes.py @@ -0,0 +1,14 @@ +from flask import jsonify, current_app +from .scraper import scrape_news + + +@current_app.route('/scrape', methods=['GET']) +def scrape_and_save(): + try: + print("Iniciando raspagem e salvamento de notícias...") + news = scrape_news() + print("Notícias raspadas:", news) + return jsonify({'message': 'News scraped and saved successfully', 'news': news}), 200 + except Exception as e: + current_app.logger.error("Erro ao raspar as notícias: %s", e) + return jsonify({'message': 'Erro ao raspar as notícias'}), 500 diff --git a/my_scraper_project/app/scraper.py b/my_scraper_project/app/scraper.py new file mode 100644 index 00000000..a7e89035 --- /dev/null +++ b/my_scraper_project/app/scraper.py @@ -0,0 +1,35 @@ +from selenium.webdriver.chrome.service import Service +from webdriver_manager.chrome import ChromeDriverManager +from selenium import webdriver +from selenium.webdriver.common.by import By + + +def scrape_news(): + print("Iniciando raspagem de notícias...") + + print("Configurando Selenium...") + + service = Service(ChromeDriverManager().install()) + chrome_options = webdriver.ChromeOptions() + chrome_options.add_argument('--headless') + chrome_options.add_argument('--no-sandbox') + chrome_options.add_argument('--disable-dev-shm-usage') + driver = webdriver.Chrome(service=service, options=chrome_options) + + print("Acessando URL...") + url = 'https://www.spacemoney.com.br/ultimas-noticias' + driver.get(url) + + news = [] + elements = driver.find_elements(By.XPATH, '//div[@class="linkNoticia crop"]') + for element in elements[:5]: + title = element.text + link = element.find_element(By.TAG_NAME, 'a').get_attribute('href') + news.append({'title': title, 'link': link}) + + driver.quit() + + print("Raspagem de notícias concluída.") + print("Notícias encontradas:", news) + + return news diff --git a/my_scraper_project/requirements.txt b/my_scraper_project/requirements.txt new file mode 100644 index 00000000..6a7a3fc3 --- /dev/null +++ b/my_scraper_project/requirements.txt @@ -0,0 +1,5 @@ +Flask +requests +selenium +python-dotenv +webdriver_manager diff --git a/my_scraper_project/run.py b/my_scraper_project/run.py new file mode 100644 index 00000000..5958439d --- /dev/null +++ b/my_scraper_project/run.py @@ -0,0 +1,28 @@ +#import os +from threading import Thread +import time +import requests +from app import create_app +import logging + +logging.basicConfig(level=logging.INFO) + +app = create_app() + + +def run_flask(): + app.run(host='0.0.0.0', port=5005) + + +if __name__ == '__main__': + t = Thread(target=run_flask) + t.start() + + time.sleep(5) + + try: + response = requests.get('http://127.0.0.1:5005/scrape') + response.raise_for_status() + print(response.json()) + except requests.exceptions.RequestException as e: + logging.error("Erro ao fazer a requisição GET: %s", e) \ No newline at end of file