forked from madwind/flexget_qbittorrent_mod
-
Notifications
You must be signed in to change notification settings - Fork 1
/
html_rss.py
115 lines (105 loc) · 4.08 KB
/
html_rss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from urllib.parse import urljoin
from flexget import plugin
from flexget.entry import Entry
from flexget.event import event
from flexget.utils.soup import get_soup
from loguru import logger
from requests import RequestException
try:
import brotli
except ImportError:
brotli = None
class PluginHtmlRss():
schema = {
'type': 'object',
'properties': {
'url': {'type': 'string', 'format': 'url'},
'headers': {
'type': 'object',
'properties': {
'cookie': {'type': 'string'},
'user-agent': {'type': 'string'},
}
},
'params': {'type': 'string'},
"root_element_selector": {'type': 'string'},
'fields': {
'type': 'object',
'properties': {
'title': {
'type': 'object',
'properties': {
'element_selector': {'type': 'string'},
'attribute': {'type': 'string'},
}
},
'url': {
'type': 'object',
'properties': {
'element_selector': {'type': 'string'},
'attribute': {'type': 'string'},
},
}
},
'required': ['title', 'url'],
}
},
'required': ['url'],
'additionalProperties': False
}
def prepare_config(self, config):
config.setdefault('url', '')
config.setdefault('headers', {})
config.setdefault('params', '')
config.setdefault('root_element_selector', '')
config.setdefault('fields', {})
return config
def on_task_input(self, task, config):
config = self.prepare_config(config)
url = config.get('url')
root_element_selector = config.get('root_element_selector')
fields = config.get('fields')
params = config.get('params')
entries = []
elements = []
if url and root_element_selector:
try:
if brotli:
config.get('headers')['accept-encoding'] = 'gzip, deflate, br'
response = task.requests.get(url, headers=config.get('headers'))
content = response.content
content_encoding = response.headers.get('content-encoding')
if content_encoding == 'br':
content = brotli.decompress(response.content)
except RequestException as e:
raise plugin.PluginError(
'Unable to download the Html for task {} ({}): {}'.format(task.name, url, e)
)
elements = get_soup(content).select(root_element_selector)
if len(elements) == 0:
return entries
for element in elements:
logger.debug('element in element_selector: {}', element)
entry = Entry()
for key, value in fields.items():
entry[key] = ''
sub_element = element.select_one(value['element_selector'])
if sub_element:
if value['attribute'] == 'textContent':
sub_element_content = sub_element.get_text()
else:
sub_element_content = sub_element.get(value['attribute'], '')
entry[key] = sub_element_content
logger.debug('key: {}, value: {}', key, entry[key])
if entry['title'] and entry['url']:
base_url = urljoin(url, entry['url'])
if params.startswith("&"):
entry['url'] = base_url + params
else:
entry['url'] = urljoin(base_url, params)
entry['original_url'] = entry['url']
entries.append(entry)
return entries
@event('plugin.register')
def register_plugin():
plugin.register(PluginHtmlRss, 'html_rss', api_ver=2)