Skip to content

Commit

Permalink
chore: get_dupes refactor and benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-mcconnell committed Nov 7, 2023
1 parent 1d8cdbc commit 804d99a
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 24 deletions.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ build: .env
run:
@opts=""; if [ "$(CONFIG_MOUNT)" != "" ]; then opts="$$opts -v $(CONFIG_MOUNT):/config"; fi; \
docker run --rm --env-file=.env -p $(HOST_PORT):$(CONTAINER_PORT) $$opts -ti $(IMG_NAME)

.PHONY: benchmark_backend
benchmark_backend:
@cd backend && PYTHONPATH=$$(pwd) pytest -v benchmarks.py
27 changes: 27 additions & 0 deletions backend/benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env python3
#
# this file is meant for local development only. it expects a populated .env file
# with connection details to a valid plex server. it can be run via pytest, which
# will run multiple iterations and rounds of the get_dupe_content method, or can
# be invoked directly with ./backend/benchmark.py to simply run get_dupe_content()
# and print traces to stdout (note: traces only available if DEBUG=1 set)

import pytest
import time
from plexwrapper import PlexWrapper
from utils import print_top_traces
from dotenv import load_dotenv

load_dotenv()

def get_dupe_content():
PlexWrapper().get_dupe_content()

def test_get_dupe_content(benchmark):
benchmark.pedantic(get_dupe_content, iterations=10, rounds=3)


# allow for direct invocation, without pytest
if __name__ == "__main__":
get_dupe_content()
print_top_traces(10)
65 changes: 41 additions & 24 deletions backend/plexwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,36 +66,53 @@ def get_dupe_content(self, page=1):
dupes = []
with ThreadPoolExecutor() as executor:
futures = []
logger.debug(f"GET DUPES FOR: {[(x.title, x.type) for x in self._get_sections()]}")
for section in self._get_sections():
logger.debug("SECTION: %s", section.title)
if section.type == "movie":
logger.debug("Section type is MOVIE")
# Recursively search movies
offset = (page - 1) * self.page_size
limit = offset + self.page_size
logger.debug("Get results from offset %s to limit %s", offset, limit)
results = section.search(duplicate=True, libtype='movie', container_start=offset, limit=limit)
for movie in results:
if len(movie.media) > 1:
future = executor.submit(self.movie_to_dict, movie, section.title)
futures.append(future)
elif section.type == "show":
logger.debug("Section type is SHOW")
# Recursively search TV
offset = (page - 1) * self.page_size
limit = offset + self.page_size
logger.debug("Get results from offset %s to limit %s", offset, limit)
results = section.search(duplicate=True, libtype='episode', container_start=offset, limit=limit)
for episode in results:
if len(episode.media) > 1:
future = executor.submit(self.episode_to_dict, episode, section.title)
futures.append(future)
future = executor.submit(self.get_dupe_content_for_section, page, section)
futures.append(future)

for future in as_completed(futures):
results = future.result()
if results:
dupes = dupes + results

return dupes

@trace_time
def get_dupe_content_for_section(self, page, section):
if section.type not in ("movie", "show"):
return {}
dupes = []
to_dict_func = self.movie_to_dict
if section.type == "episode":
to_dict_func = self.episode_to_dict
with ThreadPoolExecutor() as executor:
futures = []
logger.debug("SECTION: %s/%s", section.title, section.type)
offset = (page - 1) * self.page_size
limit = offset + self.page_size
logger.debug(
"Get results for %s/%s from offset %s to limit %s",
section.title,
section.type,
offset,
limit,
)
libtype = section.type
if libtype == "show":
libtype = "episode"
results = section.search(duplicate=True, libtype=libtype, container_start=offset, limit=limit)
for item in results:
if len(item.media) > 1:
future = executor.submit(to_dict_func, item, section.title)
futures.append(future)

for future in as_completed(futures):
dupes.append(future.result())

return dupes

# TODO: refactor and multithread
@trace_time
def get_content_sample_files(self):
content = []
Expand Down Expand Up @@ -235,7 +252,7 @@ def get_thumbnail_url(self, content_key):
if item is not None:
return item.thumbUrl
else:
return "";
return ""

@classmethod
@trace_time
Expand Down
2 changes: 2 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ tqdm==4.42.0
urllib3==1.26.18
websocket-client==0.57.0
Werkzeug==3.0.1
python-dotenv==1.0.0
pytest-benchmark==4.0.0

0 comments on commit 804d99a

Please sign in to comment.