Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Victorkaillo v1.0.2 #25

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
> ![Logo Kinvo](https://github.com/kinvoapp/kinvo-mobile-test/blob/master/logo.svg)

# Para carregar o ambiente recomendo utilizar o conda:
1. $conda create --name <env_name> --file requeriments.txt
## Se preferir pode utilizar o pip:
1. $pip install -r pip_requirements.txt

# Para rodar a aplicação
2. $python3 topnews_appflask.py
# Teste para candidatos à vaga de Engenheiro de Dados (Python)


Expand Down
36 changes: 36 additions & 0 deletions pip_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
async-generator==1.10
attrs==21.4.0
beautifulsoup4==4.10.0
certifi==2021.10.8
cffi==1.15.0
charset-normalizer==2.0.12
click==8.0.4
cryptography==36.0.1
Flask==2.0.3
h11==0.13.0
html5lib==1.1
idna==3.3
itsdangerous==2.1.1
Jinja2==3.0.3
lxml==4.8.0
MarkupSafe==2.1.0
numpy==1.22.3
outcome==1.1.0
pandas==1.4.1
pycparser==2.21
pyOpenSSL==22.0.0
PySocks==1.7.1
python-dateutil==2.8.2
pytz==2021.3
requests==2.27.1
selenium==4.1.3
six==1.16.0
sniffio==1.2.0
sortedcontainers==2.4.0
soupsieve==2.3.1
trio==0.20.0
trio-websocket==0.9.2
urllib3==1.26.8
webencodings==0.5.1
Werkzeug==2.0.3
wsproto==1.1.0
54 changes: 54 additions & 0 deletions requeriments.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: osx-64
async-generator=1.10=pypi_0
attrs=21.4.0=pypi_0
beautifulsoup4=4.10.0=pypi_0
ca-certificates=2022.2.1=hecd8cb5_0
certifi=2021.10.8=py39hecd8cb5_2
cffi=1.15.0=pypi_0
charset-normalizer=2.0.12=pypi_0
click=8.0.4=pypi_0
cryptography=36.0.1=pypi_0
flask=2.0.3=pypi_0
h11=0.13.0=pypi_0
html5lib=1.1=pypi_0
idna=3.3=pypi_0
itsdangerous=2.1.1=pypi_0
jinja2=3.0.3=pypi_0
libcxx=12.0.0=h2f01273_0
libffi=3.3=hb1e8313_2
lxml=4.8.0=pypi_0
markupsafe=2.1.0=pypi_0
ncurses=6.3=hca72f7f_2
numpy=1.22.3=pypi_0
openssl=1.1.1m=hca72f7f_0
outcome=1.1.0=pypi_0
pandas=1.4.1=pypi_0
pip=21.2.4=py39hecd8cb5_0
pycparser=2.21=pypi_0
pyopenssl=22.0.0=pypi_0
pysocks=1.7.1=pypi_0
python=3.9.7=h88f2d9e_1
python-dateutil=2.8.2=pypi_0
pytz=2021.3=pypi_0
readline=8.1.2=hca72f7f_1
requests=2.27.1=pypi_0
selenium=4.1.3=pypi_0
setuptools=58.0.4=py39hecd8cb5_0
six=1.16.0=pypi_0
sniffio=1.2.0=pypi_0
sortedcontainers=2.4.0=pypi_0
soupsieve=2.3.1=pypi_0
sqlite=3.37.2=h707629a_0
tk=8.6.11=h7bc2e8c_0
trio=0.20.0=pypi_0
trio-websocket=0.9.2=pypi_0
tzdata=2021e=hda174b7_0
urllib3=1.26.8=pypi_0
webencodings=0.5.1=pypi_0
werkzeug=2.0.3=pypi_0
wheel=0.37.1=pyhd3eb1b0_0
wsproto=1.1.0=pypi_0
xz=5.2.5=h1de35cc_0
zlib=1.2.11=h4dc903c_4
30 changes: 30 additions & 0 deletions templates/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>News Table</title>
</head>
<body>
<div class="container">
<div class="page-header">
<h1>{{ titulo }}</h1>
</div>


<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Waiting click to save top news from 'spacemoney.com.br'</th>
</tr>
</thead>
<nav>
<ol>
<li><a href="http://127.0.0.1:5000/savenews">Click here to save top news about Ibovespa</a></li>
</ol>
</nav>
</table>
</div>
</body>
</html>
32 changes: 32 additions & 0 deletions templates/news.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>News Table</title>
</head>
<body>
<div class="container">
<div class="page-header">
<h1>{{ titulo }}</h1>
<a href="http://127.0.0.1:5000/savenews">Click here to update news</a>
</div>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>News</th>
</tr>
</thead>
<tbody>
{% for item in items %}
<tr>
<th></th>
<td>{{item}}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</body>
</html>
31 changes: 31 additions & 0 deletions templates/savenews.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>News Table</title>
</head>
<body>
<div class="container">
<div class="page-header">
<h1>{{ titulo }}</h1>
</div>


<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>The news has been saved</th>
</tr>
</thead>
<nav>
<ol>
<li><a href="http://127.0.0.1:5000/news">Access top15 news here</a></li>
<li><a href="http://127.0.0.1:5000/topnews">Access top5 news here</a></li>
</ol>
</nav>
</table>
</div>
</body>
</html>
25 changes: 25 additions & 0 deletions topnews_appflask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from flask import Flask, render_template
from webcrawlear import WebCrawlear

app = Flask(__name__)
dicionario = {'lista_news': ['Lista ainda não foi carregada. [Para carregar utilize endpoint "/savenews"]']}

@app.route('/')
def index():
return render_template('index.html');

@app.route('/topnews')
def topnews():
return render_template('news.html', items=dicionario['lista_news'][:5]);

@app.route('/news')
def news():
return render_template('news.html', items=dicionario['lista_news']);

@app.route('/savenews')
def savenews():
webscrawlear = WebCrawlear()
dicionario['lista_news'] = webscrawlear.get_list_news()
return render_template('savenews.html');

app.run()
52 changes: 52 additions & 0 deletions webcrawlear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By


class WebCrawlear():


def __init__(self, url=None) -> None:
spacemoney_url = 'https://www.spacemoney.com.br/ultimas-noticias'
self.url = url or spacemoney_url
self.driver = None
self.data_frame_news = None

def make_driver(self, url=None):
option = Options()
option.headless = True
self.driver = webdriver.Firefox(options=option)
self.driver.get(url or self.url)

def get_elements_by_class_name(self, class_name):
list_with_xpaths = self.driver.find_elements(
By.CLASS_NAME, value=class_name
)
return list_with_xpaths

def extract_title(self,xpath):
html_content_title = xpath.get_attribute('title')
soup_title = BeautifulSoup(html_content_title, 'html.parser')
return soup_title

def make_data_frame_news(self, class_name='titulos'):
list_with_xpaths = self.get_elements_by_class_name(class_name)
lista_news = []
for xpath in list_with_xpaths:
news = self.extract_title(xpath)
lista_news.append(news)
self.data_frame_news = pd.DataFrame(lista_news, columns = ['News'])
self.driver.quit()
return self.data_frame_news

def get_list_news(self):
self.make_driver()
dataframe_news = self.make_data_frame_news()
return dataframe_news.values.tolist()

if __name__ == "__main__":
webscralear = WebCrawlear(url='https://www.spacemoney.com.br/ultimas-noticias')
teste = webscralear.get_list_news()
print(teste)