-
Notifications
You must be signed in to change notification settings - Fork 83
/
main.py
82 lines (65 loc) · 2.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from config.config import settings
from view.utils import timer
from view.check_ip_pool import CheckIPAddress
from view.api_v4_get_shop_detail import ShopDetailCrawler
from view.api_v4_get_product_detail import ProductDetailCrawler
import logging
logger = logging.getLogger(__name__)
class Crawler:
def __init__(self, user_dict):
self.input_shop_names = user_dict["input_shop_names"]
self.user_email = user_dict["user_info"]["Email"]
self.user_name = user_dict["user_info"]["Name"]
@timer
def __call__(self):
# Step 0 > check ip pool as expected (This step is not necessary.)
logger.info(f"⌲ Step 0: Test the IP you're using 5 times.")
self.check_ip_pool()
# Step 1 > input shop_names > get shop_detail
logger.info(f"⌲ Step 1: Total shop detail fetchedd:")
crawler_shop_detail = ShopDetailCrawler()
result_shop_detail = crawler_shop_detail(self.input_shop_names)
# Step 2 > input shop_detail > get product_id
logger.info(f"⌲ Step 2: Total pdp detail fetched:")
crawler_product_detail = ProductDetailCrawler()
result_product_detail = crawler_product_detail(result_shop_detail)
result_product_detail["user_name"] = self.user_name
result_product_detail["user_email"] = self.user_email
# Step 3 > save shop & pdp data to the Bigquery
if settings.ENV == "prod":
logger.info(f"⌲ Step 3: Data saved to BigQuery.")
self.save_to_bigquery(result_shop_detail, result_product_detail)
def check_ip_pool(self):
check_ip = CheckIPAddress()
check_ip(test_times=5)
def save_to_bigquery(self, shop_details, product_details):
client = settings.setup_bigquery()
shop_details.to_gbq("shopee.shop_detail", client.project, if_exists="append")
product_details.to_gbq("shopee.pdp_detail", client.project, if_exists="append")
if __name__ == "__main__":
# Insert your email and the shop names you want to crawl
user_list = [
{
"user_info": {
"Email": "[email protected]",
"Name": "Max",
},
"input_shop_names": [
"fulinxuan",
"pat6116xx",
"join800127",
"ginilin0982353562",
"ru8285fg56",
"wangshutung",
"taiwan88888",
"baoshenfg",
"cyf66666",
"buddha8888",
"dragon9168",
"sinhochen77",
"jouhsuansu",
],
}
]
do = Crawler(user_list[0])
do()