Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feedback #1

Open
wants to merge 64 commits into
base: feedback
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
700a36b
Setting up GitHub Classroom Feedback
github-classroom[bot] Jun 2, 2021
c4d22f8
moving our files to this repository
Dangerio Jun 2, 2021
cb39621
Add files via upload
Yuliya-Zakharova Jun 3, 2021
cd860ac
upd
Dangerio Jun 3, 2021
bc3c979
upd
Dangerio Jun 3, 2021
414c68a
Add files via upload
Yuliya-Zakharova Jun 3, 2021
6a11e6f
Add files via upload
Yuliya-Zakharova Jun 3, 2021
a433d51
Update Final.ipynb
Dangerio Jun 3, 2021
74810c5
adding parsing to Final
Dangerio Jun 4, 2021
ab5a518
Add files via upload
Yuliya-Zakharova Jun 4, 2021
576ccf8
Add files via upload
Yuliya-Zakharova Jun 4, 2021
f6f5802
upd
Dangerio Jun 4, 2021
f9dc241
Чуток в final
Your-Python-Frustrates-Me Jun 4, 2021
a2bcd21
Add files via upload
Yuliya-Zakharova Jun 4, 2021
ca73472
upd
Dangerio Jun 5, 2021
a693581
upd
Dangerio Jun 5, 2021
032c5d0
upd
Dangerio Jun 5, 2021
9da2aa5
Create .gitkeep
Yuliya-Zakharova Jun 5, 2021
813f042
Add files via upload
Yuliya-Zakharova Jun 5, 2021
a5b5d26
Add files via upload
Yuliya-Zakharova Jun 5, 2021
e618f0d
download of some necessary data
Yuliya-Zakharova Jun 5, 2021
99f1c80
upd
Yuliya-Zakharova Jun 5, 2021
7e9ac79
Update Final.ipynb
Dangerio Jun 5, 2021
4626fff
upd
Dangerio Jun 5, 2021
21e272c
upd
Dangerio Jun 5, 2021
1e9375c
nothing new
Your-Python-Frustrates-Me Jun 6, 2021
a440497
Merge branch 'main' of https://github.com/hse-econ-data-science/hw4_p…
Your-Python-Frustrates-Me Jun 6, 2021
acbf294
Revert "Merge branch 'main' of https://github.com/hse-econ-data-scien…
Your-Python-Frustrates-Me Jun 6, 2021
aa06527
Revert "nothing new"
Your-Python-Frustrates-Me Jun 6, 2021
388299f
Add files via upload
Yuliya-Zakharova Jun 6, 2021
40160c6
Revert "nothing new"
Dangerio Jun 6, 2021
76dc5e5
Revert "nothing new"
Dangerio Jun 6, 2021
4911447
Reverting all
Dangerio Jun 6, 2021
7a79c11
Add files via upload
Yuliya-Zakharova Jun 6, 2021
0dcaa2a
upd
Dangerio Jun 6, 2021
42fb2c6
Merge branch 'main' of https://github.com/hse-econ-data-science/hw4_p…
Dangerio Jun 6, 2021
a282aa2
Revert "upd"
Dangerio Jun 6, 2021
a15271c
upd
Dangerio Jun 6, 2021
2e67823
upd
Dangerio Jun 6, 2021
68a1491
upd
Dangerio Jun 6, 2021
cf21a61
upd
Dangerio Jun 6, 2021
2339e3c
Finally?
Your-Python-Frustrates-Me Jun 6, 2021
69de012
upd
Dangerio Jun 6, 2021
5ebbd1a
Revert "Finally?"
Your-Python-Frustrates-Me Jun 6, 2021
966055e
Sum
Your-Python-Frustrates-Me Jun 6, 2021
23a44d0
+Maps
Your-Python-Frustrates-Me Jun 6, 2021
5eee9fd
Nothing
Your-Python-Frustrates-Me Jun 6, 2021
3f8c4be
upd
Dangerio Jun 6, 2021
0d0f7fc
upd
Dangerio Jun 6, 2021
3865678
upd
Dangerio Jun 6, 2021
1433a8f
Create README.md
Yuliya-Zakharova Jun 6, 2021
b166053
Update README.md
Yuliya-Zakharova Jun 6, 2021
76d1f5e
Update README.md
Yuliya-Zakharova Jun 6, 2021
b49b556
Update README.md
Yuliya-Zakharova Jun 6, 2021
a63ae99
Update README.md
Yuliya-Zakharova Jun 6, 2021
54c460e
Update README.md
Dangerio Jun 6, 2021
745e632
Update README.md
Yuliya-Zakharova Jun 6, 2021
1ddecfc
Update README.md
Yuliya-Zakharova Jun 6, 2021
150e5d7
Update README.md
Yuliya-Zakharova Jun 6, 2021
18fcd83
Nothing
Your-Python-Frustrates-Me Jun 6, 2021
bc58d0b
Upd
Your-Python-Frustrates-Me Jun 6, 2021
a08994e
Update README.md
Yuliya-Zakharova Jun 6, 2021
f05cbb8
Update README.md
Dangerio Jun 6, 2021
4648ef4
Update README.md
Dangerio Jun 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
555 changes: 555 additions & 0 deletions .ipynb_checkpoints/Cian_loader-checkpoint.ipynb

Large diffs are not rendered by default.

109 changes: 109 additions & 0 deletions .ipynb_checkpoints/Cian_parser-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from fake_useragent import UserAgent\n",
"from tqdm.notebook import trange, tqdm\n",
"import time\n",
"import random\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def url_search(iterable):\n",
" urls = []\n",
" errors = []\n",
" for i in tqdm(sorted(iterable, key=lambda w: random.random())):\n",
" try:\n",
" url = f'https://www.cian.ru/cat.php?deal_type=rent&engine_version=2&offer_type=flat&p={i}&region=1&room3=1&type=4'\n",
" response = requests.get(url, headers={'User-Agent': UserAgent().chrome})\n",
" soup = BeautifulSoup(response.content)\n",
" print(response)\n",
" for item in soup.find('div', {'data-name': 'Offers'}).find_all('a'):\n",
" url = item.get('href')\n",
" if url != None and 'https://www.cian.ru/rent/flat/' in url:\n",
" urls.append(url)\n",
" if i in errors:\n",
" errors.remove(i)\n",
" time.sleep(random.randint(1, 3))\n",
" except:\n",
" print('error')\n",
" errors.append(i)\n",
" if len(errors) > 0:\n",
" return urls.extend(url_search(errors)) \n",
" else:\n",
" return urls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"urls = url_search(range(1, 55))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(urls)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"urls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('urls.json', 'w', encoding='utf8') as f:\n",
" json.dump(urls, f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading