Merge pull request #119 from se1exin/get_dupes_refactor_and_bench

chore: get_dupes refactor and benchmarks
se1exin · Nov 7, 2023 · 9bd2fbb · 9bd2fbb
2 parents 1d8cdbc + 804d99a
commit 9bd2fbb
Show file tree

Hide file tree

Showing 4 changed files with 74 additions and 24 deletions.
diff --git a/Makefile b/Makefile
@@ -14,3 +14,7 @@ build: .env
 run:
 	@opts=""; if [ "$(CONFIG_MOUNT)" != "" ]; then opts="$$opts -v $(CONFIG_MOUNT):/config"; fi; \
 	docker run --rm --env-file=.env -p $(HOST_PORT):$(CONTAINER_PORT) $$opts -ti $(IMG_NAME)
+
+.PHONY: benchmark_backend
+benchmark_backend:
+	@cd backend && PYTHONPATH=$$(pwd) pytest -v benchmarks.py
diff --git a/backend/benchmarks.py b/backend/benchmarks.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+#
+# this file is meant for local development only. it expects a populated .env file
+# with connection details to a valid plex server. it can be run via pytest, which
+# will run multiple iterations and rounds of the get_dupe_content method, or can
+# be invoked directly with ./backend/benchmark.py to simply run get_dupe_content()
+# and print traces to stdout (note: traces only available if DEBUG=1 set)
+
+import pytest
+import time
+from plexwrapper import PlexWrapper
+from utils import print_top_traces
+from dotenv import load_dotenv
+
+load_dotenv()
+
+def get_dupe_content():
+    PlexWrapper().get_dupe_content()
+
+def test_get_dupe_content(benchmark):
+    benchmark.pedantic(get_dupe_content, iterations=10, rounds=3)
+
+
+# allow for direct invocation, without pytest
+if __name__ == "__main__":
+    get_dupe_content()
+    print_top_traces(10)
diff --git a/backend/plexwrapper.py b/backend/plexwrapper.py
@@ -66,36 +66,53 @@ def get_dupe_content(self, page=1):
         dupes = []
         with ThreadPoolExecutor() as executor:
             futures = []
+            logger.debug(f"GET DUPES FOR: {[(x.title, x.type) for x in self._get_sections()]}")
             for section in self._get_sections():
-                logger.debug("SECTION: %s", section.title)
-                if section.type == "movie":
-                    logger.debug("Section type is MOVIE")
-                    # Recursively search movies
-                    offset = (page - 1) * self.page_size
-                    limit = offset + self.page_size
-                    logger.debug("Get results from offset %s to limit %s", offset, limit)
-                    results = section.search(duplicate=True, libtype='movie', container_start=offset, limit=limit)
-                    for movie in results:
-                        if len(movie.media) > 1:
-                            future = executor.submit(self.movie_to_dict, movie, section.title)
-                            futures.append(future)
-                elif section.type == "show":
-                    logger.debug("Section type is SHOW")
-                    # Recursively search TV
-                    offset = (page - 1) * self.page_size
-                    limit = offset + self.page_size
-                    logger.debug("Get results from offset %s to limit %s", offset, limit)
-                    results = section.search(duplicate=True, libtype='episode', container_start=offset, limit=limit)
-                    for episode in results:
-                        if len(episode.media) > 1:
-                            future = executor.submit(self.episode_to_dict, episode, section.title)
-                            futures.append(future)
+                future = executor.submit(self.get_dupe_content_for_section, page, section)
+                futures.append(future)
+
+            for future in as_completed(futures):
+                results = future.result()
+                if results:
+                    dupes = dupes + results
+
+        return dupes
+
+    @trace_time
+    def get_dupe_content_for_section(self, page, section):
+        if section.type not in ("movie", "show"):
+            return {}
+        dupes = []
+        to_dict_func = self.movie_to_dict
+        if section.type == "episode":
+            to_dict_func = self.episode_to_dict
+        with ThreadPoolExecutor() as executor:
+            futures = []
+            logger.debug("SECTION: %s/%s", section.title, section.type)
+            offset = (page - 1) * self.page_size
+            limit = offset + self.page_size
+            logger.debug(
+                "Get results for %s/%s from offset %s to limit %s",
+                section.title,
+                section.type,
+                offset,
+                limit,
+            )
+            libtype = section.type
+            if libtype == "show":
+                libtype = "episode"
+            results = section.search(duplicate=True, libtype=libtype, container_start=offset, limit=limit)
+            for item in results:
+                if len(item.media) > 1:
+                    future = executor.submit(to_dict_func, item, section.title)
+                    futures.append(future)
 
             for future in as_completed(futures):
                 dupes.append(future.result())
 
         return dupes
 
+    # TODO: refactor and multithread
     @trace_time
     def get_content_sample_files(self):
         content = []
@@ -235,7 +252,7 @@ def get_thumbnail_url(self, content_key):
         if item is not None:
             return item.thumbUrl
         else:
-            return "";
+            return ""
 
     @classmethod
     @trace_time

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -15,3 +15,5 @@ tqdm==4.42.0
 urllib3==1.26.18
 websocket-client==0.57.0
 Werkzeug==3.0.1
+python-dotenv==1.0.0
+pytest-benchmark==4.0.0