Skip to content

Commit

Permalink
Make sh** faster (some cashing, regx improvements, orjson support), f…
Browse files Browse the repository at this point in the history
…ix search lagging and crashing
  • Loading branch information
Commandcracker committed Jun 12, 2024
1 parent 187e1ff commit d704e82
Show file tree
Hide file tree
Showing 17 changed files with 133 additions and 89 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ Place your custom CSS in `user_config_path("gucken").joinpath("custom.css")` and

## Optional dependencies

- `levenshtein` - Faster fuzzy sort/search. (with: `gucken[levenshtein]`)
- `speedups` (with: `gucken[speedups]`)
- Faster fuzzy sort/search. (`levenshtein`)
- Faster json parsing. (`orjson`)
- `socks` - SOCKS proxy support. (with: `gucken[socks]`)

## Todo
Expand Down Expand Up @@ -196,6 +198,7 @@ selenium or playwright

### UX

- [ ] Add hotkey to clear cache (F5)
- [ ] Translation DE, EN
- [ ] Improve settings design
- [ ] Merge SerienStream.to and AniWorld.to search results
Expand All @@ -217,8 +220,9 @@ selenium or playwright
### Speedups

- [ ] Pre-fetching
- [ ] Caching
- [ ] More threads and asyncio.gather to make everything faster
- [ ] More Caching
- [ ] Reuse Client

### Code

Expand Down Expand Up @@ -262,7 +266,6 @@ selenium or playwright

### Bugs & DX

- [ ] FIX TYPING SOMETIMES CAUSES CRASH
- [ ] Proper error handling
- [ ] Logging and Crash reports
- [ ] Blacklist detection & bypass
Expand Down
12 changes: 8 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ maintainers = [{name="Commandcracker"}]
license = {file = "LICENSE.txt"}
readme = "README.md"
dependencies = [
"textual>=0.64.0",
"textual>=0.67.0",
"beautifulsoup4>=4.12.3",
"httpx[http2]>=0.27.0",
"pypresence>=4.3.0",
"packaging>=24.0",
"packaging>=24.1",
"platformdirs>=4.2.2",
"toml>=0.10.2",
"fuzzywuzzy>=0.18.0"
"fuzzywuzzy>=0.18.0",
"async_lru>=2.0.4"
#"yt-dlp>=2024.4.9",
#"mpv>=1.0.6",
]
Expand Down Expand Up @@ -47,7 +48,10 @@ classifiers = [
]

[project.optional-dependencies]
levenshtein = ["levenshtein>=0.25.1"]
speedups = [
"levenshtein>=0.25.1",
"orjson>=3.10.4"
]
socks = ["httpx[socks]>=0.27.0"]

[project.urls]
Expand Down
2 changes: 1 addition & 1 deletion src/gucken/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import warnings
warnings.filterwarnings('ignore', message='Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')

__version__ = "0.1.12"
__version__ = "0.2.0"
3 changes: 2 additions & 1 deletion src/gucken/aniskip.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .networking import AsyncClient
from .tracker.myanimelist import search
from .rome import replace_roman_numerals
from .utils import json_loads


@dataclass
Expand All @@ -24,7 +25,7 @@ async def get_timings_from_id(
response = await client.get(
f"https://api.aniskip.com/v1/skip-times/{anime_id}/{episode_number}?types=op&types=ed"
)
json = response.json()
json = json_loads(response.content)
if json.get("found") is not True:
return
op_start = 0
Expand Down
92 changes: 65 additions & 27 deletions src/gucken/gucken.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from textual.command import Hits, Provider as TextualProvider, Hit, DiscoveryHit
import argparse
import logging
from asyncio import gather
from asyncio import gather, set_event_loop, new_event_loop
from atexit import register as register_atexit
from os import remove, name as os_name
from os.path import join
Expand All @@ -12,6 +12,7 @@
from subprocess import DEVNULL, PIPE, Popen
from time import sleep, time
from typing import ClassVar, List, Union
from async_lru import alru_cache

from fuzzywuzzy import fuzz
from platformdirs import user_config_path, user_log_path
Expand Down Expand Up @@ -39,6 +40,7 @@
TabbedContent,
TabPane,
)
from textual.worker import get_current_worker

from .aniskip import (
generate_chapters_file,
Expand Down Expand Up @@ -375,15 +377,8 @@ def compose(self) -> ComposeResult:

@on(Input.Changed)
async def input_changed(self, event: Input.Changed):
id = event.control.id
value = event.value

if id == "input":
if value:
self.lookup_anime(value)
else:
# TODO: fix sometimes wont clear
await self.query_one("#results", ListView).clear()
if event.control.id == "input":
self.lookup_anime(event.value)

@on(SortableTable.SortChanged)
async def sortableTable_sortChanged(
Expand Down Expand Up @@ -507,21 +502,58 @@ async def disable_RPC(self):
self.RPC.sock_writer.close()
self.RPC = None

# TODO: https://textual.textualize.io/guide/workers/#thread-workers
@alru_cache(maxsize=64, ttl=600) # Cache 64 entries. Clear entry after 10 minutes.
async def aniworld_search(self, keyword: str) -> Union[list[SearchResult], None]:
return await AniWorldProvider.search(keyword)

@alru_cache(maxsize=64, ttl=600) # Cache 64 entries. Clear entry after 10 minutes.
async def serienstream_search(self, keyword: str) -> Union[list[SearchResult], None]:
return await SerienStreamProvider.search(keyword)

def sync_gather(self, tasks: list):
async def gather_all():
return await gather(*tasks)

loop = new_event_loop()
set_event_loop(loop)
return loop.run_until_complete(gather_all())

# TODO: Exit on error when debug = true
@work(exclusive=True) #exit_on_error=False
async def lookup_anime(self, keyword: str) -> None:
# TODO: sometimes not removing loading state
# TODO: FIX
"""
sys:1: RuntimeWarning: coroutine '_LRUCacheWrapperInstanceMethod.__call__' was never awaited
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
"""

@work(exclusive=True, thread=True, exit_on_error=False)
def lookup_anime(self, keyword: str) -> None:
results_list_view = self.query_one("#results", ListView)
worker = get_current_worker()

if keyword is None:
if not worker.is_cancelled:
self.call_from_thread(results_list_view.clear)
results_list_view.loading = False
return

aniworld_to = self.query_one("#aniworld_to", Checkbox).value
serienstream_to = self.query_one("#serienstream_to", Checkbox).value

search_providers = []
if self.query_one("#aniworld_to", Checkbox).value:
search_providers.append(AniWorldProvider.search(keyword))

if self.query_one("#serienstream_to", Checkbox).value:
search_providers.append(SerienStreamProvider.search(keyword))
if aniworld_to:
search_providers.append(self.aniworld_search(keyword))
if serienstream_to:
search_providers.append(self.serienstream_search(keyword))

results_list_view = self.query_one("#results", ListView)
await results_list_view.clear()
if worker.is_cancelled:
return
self.call_from_thread(results_list_view.clear)
results_list_view.loading = True
results = await gather(*search_providers)
if worker.is_cancelled:
return
results = self.sync_gather(search_providers)
final_results = []
for l in results:
if l is not None:
Expand All @@ -542,7 +574,9 @@ def fuzzy_sort_key(result):
f"\n{series.description}"
)
))
await results_list_view.extend(items)
if worker.is_cancelled:
return
self.call_from_thread(results_list_view.extend, items)
results_list_view.loading = False
if len(final_results) > 0:

Expand All @@ -552,7 +586,7 @@ def select_first_index():
except AssertionError:
pass

self.app.call_later(select_first_index)
self.call_later(select_first_index)

async def on_key(self, event: events.Key) -> None:
key = event.key
Expand Down Expand Up @@ -597,6 +631,10 @@ async def play_selected(self):
)
dt.loading = False

@alru_cache(maxsize=32, ttl=600) # Cache 32 entries. Clear entry after 10 minutes.
async def get_series(self, series_search_result: SearchResult):
return await series_search_result.get_series()

@work(exclusive=True)
async def open_info(self) -> None:
series_search_result: SearchResult = self.current[
Expand All @@ -605,13 +643,14 @@ async def open_info(self) -> None:
info_tab = self.query_one("#info", TabPane)
info_tab.disabled = False
info_tab.loading = True
self.query_one(TabbedContent).active = "info"
table = self.query_one("#season_list", DataTable)
table.focus(scroll_visible=False)
md = self.query_one("#markdown", Markdown)
series = await series_search_result.get_series()

series = await self.get_series(series_search_result)
self.current_info = series
await md.update(series.to_markdown())

table = self.query_one("#season_list", DataTable)
table.clear()
c = 0
for ep in series.episodes:
Expand All @@ -632,7 +671,6 @@ async def open_info(self) -> None:
" ".join(sort_favorite_hoster_by_key(hl, self.hoster)),
" ".join(ll),
)
table.focus(scroll_visible=False)
info_tab.loading = False

@work(exclusive=True, thread=True)
Expand Down Expand Up @@ -902,7 +940,7 @@ def main():
if args.debug:
logs_path = user_log_path("gucken", ensure_exists=True)
logging.basicConfig(
filename=logs_path.joinpath("gucken.log"), encoding="utf-8", level=logging.INFO
filename=logs_path.joinpath("gucken.log"), encoding="utf-8", level=logging.INFO, force=True
)

register_atexit(gucken_settings_manager.save)
Expand Down
31 changes: 9 additions & 22 deletions src/gucken/hoster/doodstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
from re import compile as re_compile
from string import ascii_letters, digits
from time import time
from urllib.parse import urlparse

from ..networking import AsyncClient
from .common import DirectLink, Hoster

EXTRACT_DOODSTREAM_HLS_PATTERN = re_compile(r"/pass_md5/[\w-]+/[\w-]+")
EXTRACT_DOODSTREAM_HLS_PATTERN = re_compile(r"/pass_md5/[\w-]+/(?P<token>[\w-]+)")


def random_str(length: int = 10) -> str:
Expand All @@ -19,30 +18,18 @@ def js_date_now() -> int:
return int(time() * 1000)


headers = {"Referer": "https://d0000d.com/"}


@dataclass
class DoodstreamHoster(Hoster):
requires_headers = True

async def get_direct_link(self) -> DirectLink:
async with AsyncClient(verify=False) as client:
response = await client.head(self.url)
if response.has_redirect_location:
u2 = (
urlparse(response.headers.get("Location"))
._replace(netloc="d000d.com")
.geturl()
)
response = await client.get(u2)

pass_md5 = EXTRACT_DOODSTREAM_HLS_PATTERN.search(response.text)
response = await client.get(
f"https://d0000d.com{pass_md5.group()}",
headers={"Referer": "https://d0000d.com/"},
)
async with AsyncClient(verify=False, auto_referer=True) as client:
response1 = await client.get(self.url)
match = EXTRACT_DOODSTREAM_HLS_PATTERN.search(response1.text)

# Require Referer
response2 = await client.get(str(response1.url.copy_with(path=match.group())))
return DirectLink(
url=f"{response.text}{random_str()}?token={pass_md5.group().split('/')[-1]}&expiry={js_date_now()}",
headers=headers,
url=f"{response2.text}{random_str()}?token={match.group("token")}&expiry={js_date_now()}",
headers={"Referer": str(response2.url.copy_with(path="/"))},
)
17 changes: 4 additions & 13 deletions src/gucken/hoster/streamtape.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,18 @@
from ..networking import AsyncClient
from .common import DirectLink, Hoster

STREAMTAPE_PATTERN = re_compile(r"botlink(.*?)innerHTML(.*?)\);")
STREAMTAPE_PATTERN_SUBSTRING = re_compile(r"substring\(\d+")
STREAMTAPE_PATTERN_DIGETS = re_compile(r"\d+")
STREAMTAPE_PATTERN = re_compile(r"'botlink.*innerHTML.*?'(?P<s1>.*)'.*?\+.*?'(?P<s2>.*)'")


class StreamtapeHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
# TODO: Error checking
async with AsyncClient(verify=False) as client:
response = await client.get(self.url)

# TODO: Save html and error in order to investigate
# with open("out.txt", "wb") as f:
# f.write(response.text.encode('utf-8'))
video_src = STREAMTAPE_PATTERN.search(response.text)
j1 = "".join(video_src.groups())
u1 = j1.split(" ")[2][1:-2]
u2 = j1[j1.index("('") + 2 : j1.rfind("')")]

matches = STREAMTAPE_PATTERN_SUBSTRING.findall(j1)
for match in matches:
sub = STREAMTAPE_PATTERN_DIGETS.search(match).group(0)
u2 = u2[int(sub) :]

return DirectLink(f"https:{u1}{u2}")
match = STREAMTAPE_PATTERN.search(response.text)
return DirectLink(f"https:{match.group("s1")}{match.group('s2')[4:]}")
8 changes: 4 additions & 4 deletions src/gucken/hoster/veo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from ..networking import AsyncClient
from .common import DirectLink, Hoster

EXTRACT_VEO_HLS_PATTERN = re_compile(r"'hls': '(.*?)'")
EXTRACT_VEO_HLS_PATTERN = re_compile(r"'hls': '(?P<hls>.*)'")


class VOEHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
async with AsyncClient(verify=False) as client:
response = await client.get(self.url)
match_hls = EXTRACT_VEO_HLS_PATTERN.search(response.text)
hls_link = match_hls.group(1)
return DirectLink(b64decode(hls_link).decode())
match = EXTRACT_VEO_HLS_PATTERN.search(response.text)
link = match.group("hls")
return DirectLink(b64decode(link).decode())
7 changes: 3 additions & 4 deletions src/gucken/hoster/vidoza.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@

from .common import DirectLink, Hoster

# TODO: improve all patterns
EXTRACT_VIDOZA_HLS_PATTERN = re_compile(
r"sourcesCode:.*?\[.*?\{.*?src:.*?[\'|\"](?P<hls>.*?)[\'|\"],"
r"sourcesCode:.*?\[.*?\{.*?src:.*?[\'|\"](?P<mp4>.*?)[\'|\"],"
)


class VidozaHoster(Hoster):
async def get_direct_link(self) -> DirectLink:
async with AsyncClient(verify=False) as client:
response = await client.get(self.url)
match_hls = EXTRACT_VIDOZA_HLS_PATTERN.search(response.text)
return DirectLink(match_hls.group(1))
match = EXTRACT_VIDOZA_HLS_PATTERN.search(response.text)
return DirectLink(match.group("mp4"))
Loading

0 comments on commit d704e82

Please sign in to comment.