-
Notifications
You must be signed in to change notification settings - Fork 0
/
series_GUI.py
116 lines (88 loc) · 4.23 KB
/
series_GUI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#Series duration calculator with webscraping and TKinter
import tkinter as tk
from bs4 import BeautifulSoup
import requests
import re
def scrape_website():
series = series_entry.get()
series=series.title()
series = series.replace(" ","_") #URLs of wikipedia contains '_' in the place of " "
url_init = "https://en.wikipedia.org/wiki/"
url=url_init+series+"_(TV_series)"
r = requests.get(url)
soup = BeautifulSoup(r.content , 'html.parser')
thead = soup.find_all('th',class_='infobox-label') #returns all the elements of table head
tdata = soup.find_all('td',class_='infobox-data') #returns all the elements of table data
count_episodes=0
for i in thead: #looping and incrementing the count until we get "No of episodes" in table head
if i.get_text()=="No. of episodes":
break
count_episodes+=1
count_rtime=0
for i in thead: #looping and incrementing the count until we get "running time" in table head
if i.get_text()=="Running time":
break
count_rtime+=1
try:
rtime=tdata[count_rtime].get_text() #accessing episodes using count
episodes =tdata[count_episodes].get_text() #accessing time using count
numbers_episodes=re.findall('\d+',episodes) #using regular expressions to extract numbers from strings
numbers_runtime=re.findall('\d+',rtime)
if len(numbers_runtime)>1 :
duration=((int(numbers_runtime[0])+int(numbers_runtime[1]))/2)*int(numbers_episodes[0])
else :
duration=(int(numbers_runtime[0]))*int(numbers_episodes[0])
results_label1.config(text="Number of episodes: "+episodes)
results_label2.config(text="Running Time: "+ rtime)
results_label3.config(text="Duration: "+str(duration)+" minutes")
except:
url=url_init+series #repeating everything again without appending "_TV(Series)" to the url
r = requests.get(url)
soup = BeautifulSoup(r.content , 'html.parser')
thead = soup.find_all('th',class_='infobox-label')
tdata = soup.find_all('td',class_='infobox-data')
count_episodes=0
for i in thead:
if i.get_text()=="No. of episodes":
break
count_episodes+=1
count_rtime=0
for i in thead:
if i.get_text()=="Running time":
break
count_rtime+=1
try:
rtime=tdata[count_rtime].get_text() #accessing episodes using count
episodes =tdata[count_episodes].get_text() #accessing time using count
numbers_episodes=re.findall('\d+',episodes) #using regular expressions to extract numbers from strings
numbers_runtime=re.findall('\d+',rtime)
if len(numbers_runtime)>1 :
duration=((int(numbers_runtime[0])+int(numbers_runtime[1]))/2)*int(numbers_episodes[0])
else :
duration=(int(numbers_runtime[0]))*int(numbers_episodes[0])
results_label1.config(text="Number of episodes: "+episodes)
results_label2.config(text="Running Time: "+ rtime)
results_label3.config(text="Duration: "+str(duration)+" minutes")
except:
results_label1.config(text="Invalid")
results_label2.config(text="")
results_label3.config(text="")
root = tk.Tk() # Creating a Tkinter window
root.title("Series Duration Calculator")
series_label = tk.Label(root, text="Enter the series:") # Creating a label for the URL input
series_label.pack()
# Creating an entry field for the URL
series_entry = tk.Entry(root)
series_entry.pack()
# Creating a button to scrape the website
scrape_button = tk.Button(root, text="Find",bg="black", fg="white", command=scrape_website)
scrape_button.pack()
# Creating labels for the results
results_label1 = tk.Label(root, text="")
results_label1.pack()
results_label2 = tk.Label(root, text="")
results_label2.pack()
results_label3 = tk.Label(root, text="")
results_label3.pack()
# Run the Tkinter mainloop
root.mainloop()