From 804d99a8dd4e545f485c2e9e73d37477b20950c5 Mon Sep 17 00:00:00 2001 From: Peter McConnell Date: Tue, 7 Nov 2023 20:32:07 +0000 Subject: [PATCH] chore: get_dupes refactor and benchmarks --- Makefile | 4 +++ backend/benchmarks.py | 27 +++++++++++++++++ backend/plexwrapper.py | 65 +++++++++++++++++++++++++--------------- backend/requirements.txt | 2 ++ 4 files changed, 74 insertions(+), 24 deletions(-) create mode 100755 backend/benchmarks.py diff --git a/Makefile b/Makefile index 7fcac14..3145b7e 100644 --- a/Makefile +++ b/Makefile @@ -14,3 +14,7 @@ build: .env run: @opts=""; if [ "$(CONFIG_MOUNT)" != "" ]; then opts="$$opts -v $(CONFIG_MOUNT):/config"; fi; \ docker run --rm --env-file=.env -p $(HOST_PORT):$(CONTAINER_PORT) $$opts -ti $(IMG_NAME) + +.PHONY: benchmark_backend +benchmark_backend: + @cd backend && PYTHONPATH=$$(pwd) pytest -v benchmarks.py diff --git a/backend/benchmarks.py b/backend/benchmarks.py new file mode 100755 index 0000000..86cc9c8 --- /dev/null +++ b/backend/benchmarks.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# +# this file is meant for local development only. it expects a populated .env file +# with connection details to a valid plex server. it can be run via pytest, which +# will run multiple iterations and rounds of the get_dupe_content method, or can +# be invoked directly with ./backend/benchmark.py to simply run get_dupe_content() +# and print traces to stdout (note: traces only available if DEBUG=1 set) + +import pytest +import time +from plexwrapper import PlexWrapper +from utils import print_top_traces +from dotenv import load_dotenv + +load_dotenv() + +def get_dupe_content(): + PlexWrapper().get_dupe_content() + +def test_get_dupe_content(benchmark): + benchmark.pedantic(get_dupe_content, iterations=10, rounds=3) + + +# allow for direct invocation, without pytest +if __name__ == "__main__": + get_dupe_content() + print_top_traces(10) diff --git a/backend/plexwrapper.py b/backend/plexwrapper.py index f7c77dd..3a984fe 100644 --- a/backend/plexwrapper.py +++ b/backend/plexwrapper.py @@ -66,36 +66,53 @@ def get_dupe_content(self, page=1): dupes = [] with ThreadPoolExecutor() as executor: futures = [] + logger.debug(f"GET DUPES FOR: {[(x.title, x.type) for x in self._get_sections()]}") for section in self._get_sections(): - logger.debug("SECTION: %s", section.title) - if section.type == "movie": - logger.debug("Section type is MOVIE") - # Recursively search movies - offset = (page - 1) * self.page_size - limit = offset + self.page_size - logger.debug("Get results from offset %s to limit %s", offset, limit) - results = section.search(duplicate=True, libtype='movie', container_start=offset, limit=limit) - for movie in results: - if len(movie.media) > 1: - future = executor.submit(self.movie_to_dict, movie, section.title) - futures.append(future) - elif section.type == "show": - logger.debug("Section type is SHOW") - # Recursively search TV - offset = (page - 1) * self.page_size - limit = offset + self.page_size - logger.debug("Get results from offset %s to limit %s", offset, limit) - results = section.search(duplicate=True, libtype='episode', container_start=offset, limit=limit) - for episode in results: - if len(episode.media) > 1: - future = executor.submit(self.episode_to_dict, episode, section.title) - futures.append(future) + future = executor.submit(self.get_dupe_content_for_section, page, section) + futures.append(future) + + for future in as_completed(futures): + results = future.result() + if results: + dupes = dupes + results + + return dupes + + @trace_time + def get_dupe_content_for_section(self, page, section): + if section.type not in ("movie", "show"): + return {} + dupes = [] + to_dict_func = self.movie_to_dict + if section.type == "episode": + to_dict_func = self.episode_to_dict + with ThreadPoolExecutor() as executor: + futures = [] + logger.debug("SECTION: %s/%s", section.title, section.type) + offset = (page - 1) * self.page_size + limit = offset + self.page_size + logger.debug( + "Get results for %s/%s from offset %s to limit %s", + section.title, + section.type, + offset, + limit, + ) + libtype = section.type + if libtype == "show": + libtype = "episode" + results = section.search(duplicate=True, libtype=libtype, container_start=offset, limit=limit) + for item in results: + if len(item.media) > 1: + future = executor.submit(to_dict_func, item, section.title) + futures.append(future) for future in as_completed(futures): dupes.append(future.result()) return dupes + # TODO: refactor and multithread @trace_time def get_content_sample_files(self): content = [] @@ -235,7 +252,7 @@ def get_thumbnail_url(self, content_key): if item is not None: return item.thumbUrl else: - return ""; + return "" @classmethod @trace_time diff --git a/backend/requirements.txt b/backend/requirements.txt index 6c1dbe4..d8033e6 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -15,3 +15,5 @@ tqdm==4.42.0 urllib3==1.26.18 websocket-client==0.57.0 Werkzeug==3.0.1 +python-dotenv==1.0.0 +pytest-benchmark==4.0.0