-
Notifications
You must be signed in to change notification settings - Fork 1
/
RaiPlayDL.py
95 lines (81 loc) · 3.52 KB
/
RaiPlayDL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python3
import re, os, contextlib, sys
import urllib
import pydub
import glob
from os import path as path
from pydub import AudioSegment
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
class Raipodcast():
def __init__(self, url):
self.url = url
def getFile (self, url,filename):
## Not sure why, but for some files only wget works fine, NOT the request thing
os.system('wget -O '+filename+' '+url)
#request = requests.get(url, timeout=60, stream=True)
#with open(filename, 'wb') as fh:
# for chunk in request.iter_content(1024 * 1024):
# fh.write(chunk)
def process(self, folder):
#Check tmp directory
if not os.path.exists('./tmp'):
os.makedirs('./tmp')
#Clean tmp directory
files = glob.glob('tmp/*')
for f in files:
os.remove(f)
print(self.url)
result = requests.get(self.url)
if result.status_code != 200:
return None
soup = BeautifulSoup(result.content, "html.parser")
title = soup.find('title').text
##print(soup) ## Utile per capire che diavolo sta parsando
##1. header is never used --> simplify the code
##header = soup.find("div", class_="descriptionProgramma")
##2. I don't care for description or image
##description = header.find(class_='textDescriptionProgramma').text
##image = urljoin(self.url, soup.find(class_='imgHomeProgramma')['src'])
##print ("Download collection image...")
##urllib.request.urlretrieve (image, "tmp/" + finalfilename + ".jpg")
finalfilename = str(title).strip().replace(' ', '_')
finalfilename = re.sub(r'(?u)[^-\w.]', '', finalfilename)
print ("Starting download for \"" + title + "\"")
allelements = soup.find_all(['li','div'])
print ("Download single MP3s...")
elementID = 1
for element in allelements:
if element.has_attr('data-mediapolis') and element.has_attr('data-title'):
mp3 = url = urljoin(self.url, element['data-mediapolis'])
singletitle = element['data-title']
singletitle = re.sub(r'(?u)[^-\w.]', '', singletitle)
filename = str(elementID).zfill(2) + "_" + str(title).strip().replace(' ', '_')
filename = re.sub(r'(?u)[^-\w.]', '', filename)
elementID = elementID + 1
print ("Download \"" + singletitle + "\" (" + mp3 + ")")
#if "Sinfonian.7" in singletitle: #This is in case, for some reason, you need only 1 file
# self.getFile(mp3, "tmp/" + singletitle + ".mp3")
self.getFile(mp3, "tmp/" + singletitle + ".mp3")
print ("Done!\nFiles saved in ./tmp/ --> Move them or they will be removed next time you run this code.")
def main():
print ("""
____ _ ____ _
| _ \ __ _(_) _ \| | __ _ _ _
| |_) / _` | | |_) | |/ _` | | | |
| _ < (_| | | __/| | (_| | |_| |
|_| \_\__,_|_|_| |_|\__,_|\__, |
Downloader |___/
---------------------------------
Proudly developed by Andrea Fortuna
https://www.andreafortuna.org
""")
if len(sys.argv) < 2:
print('Need a url')
exit(2)
getPodcast = Raipodcast(sys.argv[1])
getPodcast.process('.')
if __name__ == '__main__':
main()