diff --git a/.dockerignore b/.dockerignore index 1d6b4ed..c577e36 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,2 @@ .git/ -# videos/ \ No newline at end of file +videos/ \ No newline at end of file diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index 2eff8d1..0000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Docker - -on: - push: - # Publish `master` as Docker `latest` image. - branches: - - master - - # Publish `v1.2.3` tags as releases. - tags: - - v* - - # Run tests for any PRs. - pull_request: - -env: - # TODO: Change variable to your image's name. - IMAGE_NAME: recurring-content-detector - -jobs: - # Run tests. - # See also https://docs.docker.com/docker-hub/builds/automated-testing/ - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - name: Run tests - run: | - if [ -f docker-compose.test.yml ]; then - docker-compose --file docker-compose.test.yml build - docker-compose --file docker-compose.test.yml run sut - else - docker build . --file Dockerfile - fi - - # Push image to GitHub Packages. - # See also https://docs.docker.com/docker-hub/builds/ - push: - # Ensure test job passes before pushing image. - needs: test - - runs-on: ubuntu-latest - if: github.event_name == 'push' - - steps: - - uses: actions/checkout@v2 - - - name: Build image - run: docker build . --file Dockerfile --tag $IMAGE_NAME - - - name: Log into registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login docker.pkg.github.com -u ${{ github.actor }} --password-stdin - - - name: Push image - run: | - IMAGE_ID=docker.pkg.github.com/${{ github.repository }}/$IMAGE_NAME - - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') - - # Strip git ref prefix from version - VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') - - # Strip "v" prefix from tag name - [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') - - # Use Docker `latest` tag convention - [ "$VERSION" == "master" ] && VERSION=latest - - echo IMAGE_ID=$IMAGE_ID - echo VERSION=$VERSION - - docker tag $IMAGE_NAME $IMAGE_ID:$VERSION - docker push $IMAGE_ID:$VERSION diff --git a/.gitignore b/.gitignore index 093b467..cdb8e6a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ annotations.csv *.h5 *.mp4 *.p +videos # Byte-compiled / optimized / DLL files __pycache__/ @@ -130,3 +131,4 @@ dmypy.json # Pyre type checker .pyre/ +.DS_Store diff --git a/Dockerfile b/Dockerfile index 585f464..a8ecf8c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,6 @@ RUN conda install python=3.6 -y && \ apt-get install libglib2.0-0 -y && \ apt-get install -y libsm6 libxext6 libxrender-dev -y && \ apt-get install ffmpeg -y && \ - conda install faiss-cpu -c pytorch + conda install faiss-cpu=1.6.3 -c pytorch diff --git a/README.MD b/README.MD index 678315a..0cbc26f 100644 --- a/README.MD +++ b/README.MD @@ -56,23 +56,68 @@ You can run the detector in a python program in the following way: import recurring_content_detector as rcd rcd.detect("/directory/with/season/videofiles") ``` -This will run the detection by building the color histogram feature vectors. The feature vector function can also be changed: +This will run the detection by building the color histogram feature vectors. Make sure the video files you used can be sorted in the right alphabetical order similar as to when they play in the season! So episode_1 -> episode_2 -> episode_3 -> etc.. You'll get weird results otherwise. + + +The feature vector function can also be changed: ```python # options for the function are ["CNN", "CH", "CTM"] rcd.detect("/directory/with/season/videofiles", feature_vector_function="CNN") ``` This will CNN vectors, which are a bit more accurate but take much longer to build. -Because the videos need to be resized and the feature vectors saved in files, some artifacts will be created. On default they will be saved in the same directory as the video files, if you want them saved in a different directory: +The `detect` function has many more parameters that can be tweaked, the defaults it has, are the parameters I got the best results with on my experiments. + ```python -rcd.detect("/directory/with/season/videofiles", feature_vector_function="CH", artifacts_dir="/tmp") +def detect(video_dir, feature_vector_function="CH", annotations=None, artifacts_dir=None, framejump=3, percentile=10, resize_width=320, video_start_threshold_percentile=20, video_end_threshold_seconds=15, min_detection_size_seconds=15): +""" +The main function to call to detect recurring content. Resizes videos, converts to feature vectors +and returns the locations of recurring content within the videos. + +arguments +--------- +video_dir : str + Variable that should have the folder location of one season of video files. + +annotations : str + Location of the annotations.csv file, if annotations is given then it will evaluate the detections with the annotations. + +feature_vector_function : str + Which type of feature vectors to use, options: ["CH", "CTM", "CNN"], default is color histograms (CH) because of balance between speed and accuracy. This default is defined in init.py. + +artifacts_dir : str + Directory location where the artifacts should be saved. Default location is the location defined with the video_dir parameter. + +framejump : int + The frame interval to use when sampling frames for the detection, a higher number means that less frames will be taken into consideration and will improve the processing time. But will probably cost accuracy. + +percentile : int + Which percentile of the best matches will be taken into consideration as recurring content. A high percentile will means a higher recall, lower precision. A low percentile means a lower recall and higher precision. + +resize_width: int + Width to which the videos will be resized. A lower number means higher processing speed but less accuracy and vice versa. + +video_start_threshold_percentile: int + Percentage of the start of the video in which the detections will be marked as detections. As recaps and opening credits only occur at the first parts of video files, this parameter can alter that threshold. So putting 20 in here means that if we find recurring content in the first 20% of frames of the video, it will be marked as a detection. If it's detected later than 20%, then the detection will be ignored. + +video_end_threshold_seconds: int + Number of seconds threshold in which the final detection at the end of the video should end for it to count. Putting 15 here means that a detection at the end of a video will only be marked as a detection if the detection ends in the last 15 seconds of the video. + +min_detection_size_seconds: int + Minimal amount of seconds a detection should be before counting it as a detection. As credits & recaps & previews generally never consist of a few seconds, it's wise to pick at least a number higher than 10. + +returns +------- +dictionary + dictionary with timestamp detections in seconds list for every video file name + + {"episode1.mp4" : [(start1, end1), (start2, end2)], + "episode2.mp4" : [(start1, end1), (start2, end2)], + ... + } + """ ``` -Make sure the video files you used can be sorted in the right alphabetical order similar as to when they play in the season! So episode_1 -> episode_2 -> episode_3 -> etc.. You'll get weird results otherwise. - -It will take some time as video processing takes quite some resources. An example application in production should run detections in parallel. - - ## Annotations If you want to quantitively test out how well this works on your own data, fill in the [annotations](annotations_example.csv) file and supply it as the second parameter. @@ -99,7 +144,17 @@ Detections for: episode3.mp4 Total precision = 0.862 Total recall = 0.853 ``` + +## Tests + +There's a few tests in the test directory. They can also be run in the docker container, make sure you creted a `videos` directory with some episodes in it: +``` +docker run -it -v $(pwd):/opt/recurring-content-detector nielstenboom/recurring-content-detector:latest python -m pytest -s +``` ## Credits - https://github.com/noagarcia/keras_rmac for the CNN vectors - https://github.com/facebookresearch/faiss for the efficient matching of the feature vectors + +## Final words +If you use and like my project or want to discuss something related, I would ❤️ to hear about it! You can send me an email at nielstenboom@gmail.com. diff --git a/recurring_content_detector/__init__.py b/recurring_content_detector/__init__.py index 221808e..3df24bc 100644 --- a/recurring_content_detector/__init__.py +++ b/recurring_content_detector/__init__.py @@ -1,19 +1,5 @@ from . import detector -from . import config -def detect(video_dir, annotations = None, feature_vector_function = "CH", artifacts_dir = None): - - old_width = config.RESIZE_WIDTH - - # make sure resize width of 224 is used with CNN - if feature_vector_function == "CNN": - config.RESIZE_WIDTH = 224 - - result = detector.detect(video_dir, feature_vector_function, annotations, artifacts_dir) - - # set config variable back to the old value, - # so when reusing the module, there is no unexpected behavior. - config.RESIZE_WIDTH = old_width - - return result +def detect(*args, **kwargs): + return detector.detect(*args, **kwargs) diff --git a/recurring_content_detector/config.py b/recurring_content_detector/config.py deleted file mode 100644 index d5d0baf..0000000 --- a/recurring_content_detector/config.py +++ /dev/null @@ -1,11 +0,0 @@ -# the uniform sampling rate, every FRAMEJUMP frames a frame will be taken into account -FRAMEJUMP = 3 - -# percentile of the lowest values in the vector results to mark as detections -PERCENTILE = 10 - -# width of video to resize to, any width other than 224 will keep the aspect ratio intact -# use 224 in combination with the CNN feature vectors, 320 was used with the others -RESIZE_WIDTH = 320 - - diff --git a/recurring_content_detector/detector.py b/recurring_content_detector/detector.py index e29156f..5b37441 100644 --- a/recurring_content_detector/detector.py +++ b/recurring_content_detector/detector.py @@ -8,7 +8,6 @@ from natsort import natsorted, ns # internal imports -from . import config from . import featurevectors from . import video_functions from . import evaluation @@ -38,19 +37,26 @@ def fill_gaps(sequence, lookahead): input: [0,0,1,0,0,0,0,1,0,0] with lookahead=6 output: [0,0,1,1,1,1,1,1,0,0] """ + i = 0 - while i < len(sequence) - lookahead: - current = sequence[i] - next = sequence[i + 1 : i + lookahead].tolist() - - if current and True in next: - x = 0 - while not next[x]: - sequence[i + 1 + x] = True - x = x + 1 - - i = i + 1 - + change_needed = False + look_left = 0 + while i < len(sequence): + look_left -= 1 + if change_needed and look_left < 1: + change_needed = False + if sequence[i]: + if change_needed: + for k in to_change: + sequence[k] = True + else: + change_needed = True + look_left = lookahead + to_change = [] + else: + if change_needed: + to_change.append(i) + i+=1 return sequence def get_two_longest_timestamps(timestamps): @@ -133,7 +139,8 @@ def query_episodes_with_faiss(videos, vectors_dir): return results -def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir = None): +def detect(video_dir, feature_vector_function="CH", annotations=None, artifacts_dir=None, framejump=3, percentile=10, + resize_width=320, video_start_threshold_percentile=20, video_end_threshold_seconds=15, min_detection_size_seconds=15): """ The main function to call to detect recurring content. Resizes videos, converts to feature vectors and returns the locations of recurring content within the videos. @@ -150,6 +157,26 @@ def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir artifacts_dir : str Directory location where the artifacts should be saved. Default location is the location defined with the video_dir parameter. + framejump : int + The frame interval to use when sampling frames for the detection, a higher number means that less frames will be + taken into consideration and will improve the processing time. But will probably cost accuracy. + percentile : int + Which percentile of the best matches will be taken into consideration as recurring content. A high percentile will + means a higher recall, lower precision. A low percentile means a lower recall and higher precision. + resize_width: int + Width to which the videos will be resized. A lower number means higher processing speed but less accuracy and vice versa. + video_start_threshold_percentile: int + Percentage of the start of the video in which the detections will be marked as detections. As recaps and opening credits + only occur at the first parts of video files, this parameter can alter that threshold. So putting 20 in here means that + if we find recurring content in the first 20% of frames of the video, it will be marked as a detection. If it's detected + later than 20%, then the detection will be ignored. + video_end_threshold_seconds: int + Number of seconds threshold in which the final detection at the end of the video should end for it to count. + Putting 15 here means that a detection at the end of a video will only be marked as a detection if the detection ends + in the last 15 seconds of the video. + min_detection_size_seconds: int + Minimal amount of seconds a detection should be before counting it as a detection. As credits & recaps & previews generally + never consist of a few seconds, it's wise to pick at least a number higher than 10. returns ------- @@ -161,14 +188,19 @@ def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir ... } """ + + # if feature vector function is CNN, change resize width + if feature_vector_function == "CNN": + resize_width = 224 + print("Starting detection") - print(f"Framejump: {config.FRAMEJUMP}") - print(f"Video width: {config.RESIZE_WIDTH}") + print(f"Framejump: {framejump}") + print(f"Video width: {resize_width}") print(f"Feature vector type: {feature_vector_function}") # define the static directory names - resized_dir_name = "resized{}".format(config.RESIZE_WIDTH) - feature_vectors_dir_name = "{}_feature_vectors_framejump{}".format(feature_vector_function,config.FRAMEJUMP) + resized_dir_name = "resized{}".format(resize_width) + feature_vectors_dir_name = "{}_feature_vectors_framejump{}".format(feature_vector_function,framejump) # the video files used for the detection videos = [f for f in os.listdir(video_dir) if os.path.isfile(os.path.join(video_dir, f))] @@ -197,12 +229,12 @@ def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir # if there is no resized video yet, then resize it if not os.path.isfile(file_resized): print("Resizing {}".format(file)) - video_functions.resize(file_full, file_resized) + video_functions.resize(file_full, file_resized, resize_width) # from the resized video, construct feature vectors print("Converting {} to feature vectors".format(file)) featurevectors.construct_feature_vectors( - file_resized, feature_vectors_dir_name, feature_vector_function) + file_resized, feature_vectors_dir_name, feature_vector_function, framejump) # query the feature vectors of each episode on the other episodes results = query_episodes_with_faiss(videos, vectors_dir) @@ -212,17 +244,15 @@ def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir total_detected_seconds = 0 total_relevant_detected_seconds = 0 - framejump = config.FRAMEJUMP - all_detections = {} for video, result in results: framerate = video_functions.get_framerate(os.path.join(video_dir, video)) - threshold = np.percentile(result, config.PERCENTILE) + threshold = np.percentile(result, percentile) # all the detections below_threshold = result < threshold # Merge all detections that are less than 10 seconds apart - below_threshold = fill_gaps(below_threshold, int((framerate/config.FRAMEJUMP) * 10)) + below_threshold = fill_gaps(below_threshold, int((framerate/framejump) * 10)) # put all the indices where values are nonzero in a list of lists nonzeros = [[i for i, value in it] for key, it in itertools.groupby( @@ -236,13 +266,13 @@ def detect(video_dir, feature_vector_function, annotations = None, artifacts_dir start = nonzero[0] end = nonzero[-1] - #result is in first 20% of the video - occurs_at_beginning = end < len(result) / 5 - #the end of this timestamp ends in the last 15 seconds - ends_at_the_end = end > len(result) - 15 * (framerate/framejump) + #result is in first video_start_threshold% of the video + occurs_at_beginning = end < len(result) * (video_start_threshold_percentile / 100) + #the end of this timestamp ends in the last video_end_threshold seconds + ends_at_the_end = end > len(result) - video_end_threshold_seconds * (framerate/framejump) - if (end - start > (15 * (framerate / framejump)) #only count detection when larger than 15 seconds - and (occurs_at_beginning or ends_at_the_end)): #only use results that are in first 1/5 part or end at last 15 s + if (end - start > (min_detection_size_seconds * (framerate / framejump)) #only count detection when larger than min_detection_size_seconds seconds + and (occurs_at_beginning or ends_at_the_end)): #only use results that are in first part or end at last seconds start = start / (framerate / framejump) end = end / (framerate / framejump) diff --git a/recurring_content_detector/featurevectors.py b/recurring_content_detector/featurevectors.py index c30ecdb..b57a962 100644 --- a/recurring_content_detector/featurevectors.py +++ b/recurring_content_detector/featurevectors.py @@ -5,7 +5,6 @@ import numpy as np from math import sqrt -from . import config from . import keras_rmac @@ -85,7 +84,7 @@ def color_hist(img): result = get_img_color_hist(img, 100) return result -def construct_feature_vectors(video_fn, result_dir_name, vector_function): +def construct_feature_vectors(video_fn, result_dir_name, vector_function, framejump): """ Function that converts a video file to a list of feature vectors, which it then writes to a pickle file. @@ -112,11 +111,11 @@ def construct_feature_vectors(video_fn, result_dir_name, vector_function): # construct the histograms from frames at the start of scenes feature_vectors = [] - total = int(video.get(cv2.CAP_PROP_FRAME_COUNT) / config.FRAMEJUMP) - 1 + total = int(video.get(cv2.CAP_PROP_FRAME_COUNT) / framejump) - 1 # apply the vector function for every xth frame determined by framejump for i in tqdm(range(total)): - img = get_frame(i * config.FRAMEJUMP, video) + img = get_frame(i * framejump, video) feature_vector = vector_function(img) feature_vectors.append(feature_vector) diff --git a/recurring_content_detector/requirements.txt b/recurring_content_detector/requirements.txt deleted file mode 100644 index 8b0583e..0000000 --- a/recurring_content_detector/requirements.txt +++ /dev/null @@ -1,35 +0,0 @@ -# Requirements automatically generated by pigar. -# https://github.com/damnever/pigar - -# keras_rmac\RoiPooling.py: 1,2 -# keras_rmac\rmac.py: 4,5,6,7,11 -Keras == 2.2.4 - -# keras_rmac\rmac.py: 9 -Pillow == 5.4.1 - -# video_functions.py: 4 -ffmpeg_python == 0.1.16 - -# keras_rmac\rmac.py: 21 -matplotlib == 2.2.2 - -# featurevectors.py: 6 -# keras_rmac\get_regions.py: 3 -# keras_rmac\rmac.py: 18 -# main.py: 4 -numpy == 1.16.2 - -# featurevectors.py: 2 -# keras_rmac\rmac.py: 19 -# video_functions.py: 1 -opencv_python == 3.4.2.16 - -# evaluation.py: 2 -pandas == 0.22.0 - -# keras_rmac\rmac.py: 17 -scipy == 1.2.0 - -# featurevectors.py: 3 -tqdm == 4.19.4 diff --git a/recurring_content_detector/video_functions.py b/recurring_content_detector/video_functions.py index d5d499f..e58a45f 100644 --- a/recurring_content_detector/video_functions.py +++ b/recurring_content_detector/video_functions.py @@ -1,8 +1,6 @@ import cv2 import ffmpeg -from . import config - def get_framerate(video_fn): """ Return the video framerate given a video filename @@ -10,7 +8,7 @@ def get_framerate(video_fn): video = cv2.VideoCapture(video_fn) return video.get(cv2.CAP_PROP_FPS) -def resize(input, output): +def resize(input, output, resize_width): """ Resizes a video with ffmpeg """ @@ -19,10 +17,10 @@ def resize(input, output): if framecount > 0: stream = ffmpeg.input(input) - if config.RESIZE_WIDTH == 224: + if resize_width == 224: stream = ffmpeg.filter(stream, 'scale', w=224, h=224) else: - stream = ffmpeg.filter(stream, 'scale', w=config.RESIZE_WIDTH, h="trunc(ow/a/2)*2") + stream = ffmpeg.filter(stream, 'scale', w=resize_width, h="trunc(ow/a/2)*2") stream = ffmpeg.output(stream, output) try: ffmpeg.run(stream) diff --git a/setup.py b/setup.py index d687a2e..b657ca9 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'tqdm==4.40.2', 'natsort==6.2.0', 'tensorflow==1.14', - 'numpy==1.16.2' + 'numpy==1.16.2', + 'pytest==6.0.2' ], zip_safe=False) \ No newline at end of file diff --git a/tests/test_detector.py b/tests/test_detector.py new file mode 100644 index 0000000..33df567 --- /dev/null +++ b/tests/test_detector.py @@ -0,0 +1,46 @@ +from recurring_content_detector.detector import fill_gaps, get_two_longest_timestamps +import numpy as np + +def test_fill_gaps_regular(): + input = np.array([0,0,1,0,0,0,0,1,0,0]) + expected = [0,0,1,1,1,1,1,1,0,0] + + output = fill_gaps(input, lookahead=6) + + assert expected == output.tolist() + + +def test_fill_gaps_largerlookahaead(): + input = np.array([0,0,1,0,0,0,0,1,0,0]) + expected = [0,0,1,1,1,1,1,1,0,0] + + output = fill_gaps(input, lookahead=20) + + assert expected == output.tolist() + + + +def test_fill_gaps_smalllookahaead(): + input = np.array([0,0,1,0,0,0,0,1,0,0]) + expected = [0,0,1,0,0,0,0,1,0,0] + + output = fill_gaps(input, lookahead=3) + + assert expected == output.tolist() + + +def test_get_two_longest_timestamps_regular(): + input = [(0,10), (0,5), (20,21)] + expected = [(0,10), (0,5)] + + output = get_two_longest_timestamps(input) + + assert expected == output + +def test_get_two_longest_timestamps_singlevalue(): + input = [(0,10)] + expected = [(0,10)] + + output = get_two_longest_timestamps(input) + + assert expected == output \ No newline at end of file diff --git a/tests/test_full.py b/tests/test_full.py new file mode 100644 index 0000000..c0cc989 --- /dev/null +++ b/tests/test_full.py @@ -0,0 +1,4 @@ +import recurring_content_detector as rcd + +def test_full_run(): + results = rcd.detect("videos")