From eaf9308c6a3bf5a1801c72b2cbb7b96d880789cb Mon Sep 17 00:00:00 2001
From: bturkus <benjaminturkus@nypl.org>
Date: Wed, 27 Nov 2024 11:23:02 -0500
Subject: [PATCH 1/5] Update video_processing.py

---
 ami_scripts/video_processing.py | 45 ++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py
index f5313b9..eb761e2 100755
--- a/ami_scripts/video_processing.py
+++ b/ami_scripts/video_processing.py
@@ -32,9 +32,9 @@ def rename_files(input_directory, extensions):
         shutil.move(file, new_file)
 
 
-def convert_mkv_dv_to_mp4(input_directory):
+def convert_mkv_dv_to_mp4(input_directory, audio_pan):
     for file in itertools.chain(input_directory.glob("*.mkv"), input_directory.glob("*.dv")):
-        convert_to_mp4(file, input_directory)
+        convert_to_mp4(file, input_directory, audio_pan)
 
 
 def process_mov_files(input_directory):
@@ -104,22 +104,41 @@ def transcribe_directory(input_directory, model, output_format):
             output_writer(transcription_response, file.stem)
 
 
-def convert_to_mp4(input_file, input_directory):
+def convert_to_mp4(input_file, input_directory, audio_pan):
     output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4"
     output_file = input_directory / output_file_name
+
+    # Default audio filter
+    audio_filter = None
+
+    # Set audio pan filter based on user input
+    if audio_pan == "left":
+        audio_filter = "[0:a]pan=stereo|c0=c0|c1=c0[outa]"
+    elif audio_pan == "right":
+        audio_filter = "[0:a]pan=stereo|c0=c1|c1=c1[outa]"
+
     command = [
         "ffmpeg",
         "-i", str(input_file),
-        "-map", "0:v", "-map", "0:a",
-        "-c:v", "libx264",
-        "-movflags", "faststart",
-        "-pix_fmt", "yuv420p",
-        "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000",
-        "-vf", "yadif",
-        "-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file)
+        "-map", "0:v", "-c:v", "libx264", "-movflags", "faststart", "-pix_fmt", "yuv420p",
+        "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif",
     ]
-    subprocess.check_call(command)
 
+    # Add audio mapping and filter if specified
+    if audio_filter:
+        command.extend([
+            "-filter_complex", audio_filter,
+            "-map", "[outa]"  # Map the output of the pan filter as the audio stream
+        ])
+    else:
+        command.extend([
+            "-map", "0:a",  # Default audio mapping if no pan is specified
+        ])
+
+    # Add audio encoding options
+    command.extend(["-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file)])
+
+    subprocess.check_call(command)
     return output_file
 
     
@@ -261,6 +280,8 @@ def main():
     parser.add_argument("-o", "--output", help="Path to save csv (optional). If provided, MediaInfo extraction will be performed.", required=False)
     parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use')
     parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use')
+    parser.add_argument("-p", "--audio-pan", choices=["left", "right", "none"], default="none", help="Pan audio to center from left or right channel.")
+
 
     args = parser.parse_args()
 
@@ -281,7 +302,7 @@ def main():
     create_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies"])
 
     print("Converting MKV and DV to MP4...")
-    convert_mkv_dv_to_mp4(input_dir)
+    convert_mkv_dv_to_mp4(input_dir, args.audio_pan)
 
     print("Processing MOV files...")
     process_mov_files(input_dir)

From 6e3f44ffda72524fb00fdd9c4808a0e771438ff2 Mon Sep 17 00:00:00 2001
From: bturkus <benjaminturkus@nypl.org>
Date: Mon, 2 Dec 2024 16:52:26 -0500
Subject: [PATCH 2/5] Update video_processing.py

---
 ami_scripts/video_processing.py | 133 ++++++++++++++++++++++++++------
 1 file changed, 110 insertions(+), 23 deletions(-)

diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py
index eb761e2..13ee56f 100755
--- a/ami_scripts/video_processing.py
+++ b/ami_scripts/video_processing.py
@@ -104,19 +104,92 @@ def transcribe_directory(input_directory, model, output_format):
             output_writer(transcription_response, file.stem)
 
 
+def detect_audio_pan(input_file, probe_duration=60):
+    """
+    Detect whether the audio is isolated to the left or right channel for each audio stream.
+    Probes only the first `probe_duration` seconds of the file.
+    """
+    # Use ffprobe to get the number of audio streams
+    ffprobe_command = [
+        "ffprobe", "-i", str(input_file),
+        "-show_entries", "stream=index:stream=codec_type",
+        "-select_streams", "a", "-of", "compact=p=0:nk=1", "-v", "0"
+    ]
+    ffprobe_result = subprocess.run(ffprobe_command, capture_output=True, text=True)
+    audio_streams = [int(line.split('|')[0]) for line in ffprobe_result.stdout.splitlines() if "audio" in line]
+
+    print(f"Detected {len(audio_streams)} audio streams: {audio_streams} in {input_file}")
+
+    # Analyze each audio stream
+    pan_filters = []
+    for stream_index in audio_streams:
+        print(f"Analyzing audio stream: {stream_index}")
+
+        # Analyze left channel
+        left_analysis_command = [
+            "ffmpeg", "-t", str(probe_duration), "-i", str(input_file),
+            "-map", f"0:{stream_index}",  # Explicitly map the audio stream by index
+            "-af", "pan=mono|c0=c0,volumedetect", "-f", "null", "-"
+        ]
+        left_result = subprocess.run(left_analysis_command, capture_output=True, text=True)
+        left_output = left_result.stderr
+
+        # Analyze right channel
+        right_analysis_command = [
+            "ffmpeg", "-t", str(probe_duration), "-i", str(input_file),
+            "-map", f"0:{stream_index}",  # Explicitly map the audio stream by index
+            "-af", "pan=mono|c0=c1,volumedetect", "-f", "null", "-"
+        ]
+        right_result = subprocess.run(right_analysis_command, capture_output=True, text=True)
+        right_output = right_result.stderr
+
+        # Parse mean volumes
+        def get_mean_volume(output):
+            match = re.search(r"mean_volume:\s*(-?\d+(\.\d+)?)", output)
+            return float(match.group(1)) if match else None
+
+        left_mean_volume = get_mean_volume(left_output)
+        right_mean_volume = get_mean_volume(right_output)
+
+        # Debugging output for clarity
+        print(f"Stream {stream_index} - Left channel mean volume: {left_mean_volume}")
+        print(f"Stream {stream_index} - Right channel mean volume: {right_mean_volume}")
+
+        # Handle cases where volume data is unavailable
+        if left_mean_volume is None or right_mean_volume is None:
+            print(f"Stream {stream_index}: Unable to analyze audio. Skipping this stream.")
+            continue
+
+        # Determine if one channel is significantly louder than the other
+        silence_threshold = -60.0  # dB, adjust as needed
+        if right_mean_volume > silence_threshold and left_mean_volume <= silence_threshold:
+            print(f"Stream {stream_index}: Detected right-channel-only audio. Applying right-to-center panning.")
+            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{stream_index}]")
+        elif left_mean_volume > silence_threshold and right_mean_volume <= silence_threshold:
+            print(f"Stream {stream_index}: Detected left-channel-only audio. Applying left-to-center panning.")
+            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{stream_index}]")
+        else:
+            print(f"Stream {stream_index}: Audio is balanced or both channels are silent. No panning applied.")
+
+    # Combine pan filters for FFmpeg command
+    return pan_filters
+
+
 def convert_to_mp4(input_file, input_directory, audio_pan):
     output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4"
     output_file = input_directory / output_file_name
 
-    # Default audio filter
-    audio_filter = None
-
-    # Set audio pan filter based on user input
-    if audio_pan == "left":
-        audio_filter = "[0:a]pan=stereo|c0=c0|c1=c0[outa]"
-    elif audio_pan == "right":
-        audio_filter = "[0:a]pan=stereo|c0=c1|c1=c1[outa]"
+    # Detect audio pan automatically if set to "auto"
+    if audio_pan == "auto":
+        pan_filters = detect_audio_pan(input_file)
+    else:
+        pan_filters = []
+        if audio_pan == "left":
+            pan_filters.append("[0:a]pan=stereo|c0=c0|c1=c0[outa]")
+        elif audio_pan == "right":
+            pan_filters.append("[0:a]pan=stereo|c0=c1|c1=c1[outa]")
 
+    # FFmpeg command setup
     command = [
         "ffmpeg",
         "-i", str(input_file),
@@ -124,23 +197,36 @@ def convert_to_mp4(input_file, input_directory, audio_pan):
         "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif",
     ]
 
-    # Add audio mapping and filter if specified
-    if audio_filter:
-        command.extend([
-            "-filter_complex", audio_filter,
-            "-map", "[outa]"  # Map the output of the pan filter as the audio stream
-        ])
+    # Add audio mapping and filters if specified
+    if pan_filters:
+        if len(pan_filters) == 1:
+            # Single audio stream case
+            # Ensure consistent output label `[outa]`
+            pan_filters[0] = pan_filters[0].replace("[outa1]", "[outa]")
+            command.extend([
+                "-filter_complex", pan_filters[0],
+                "-map", "[outa]"
+            ])
+        else:
+            # Multi-audio stream case
+            filter_complex = ";".join(pan_filters)
+            command.extend([
+                "-filter_complex", filter_complex,
+            ])
+            for idx in range(len(pan_filters)):
+                command.extend(["-map", f"[outa{idx}]"])
     else:
-        command.extend([
-            "-map", "0:a",  # Default audio mapping if no pan is specified
-        ])
+        command.extend(["-map", "0:a", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"])
 
-    # Add audio encoding options
-    command.extend(["-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file)])
+    # Add output file
+    command.append(str(output_file))
 
+    print(f"FFmpeg command: {' '.join(command)}")  # Debugging output
     subprocess.check_call(command)
+    print(f"MP4 created: {output_file}")
     return output_file
 
+
     
 def convert_mov_file(input_file, input_directory):
     """Convert a MOV file to FFV1 and MP4 formats using FFmpeg"""    
@@ -280,8 +366,10 @@ def main():
     parser.add_argument("-o", "--output", help="Path to save csv (optional). If provided, MediaInfo extraction will be performed.", required=False)
     parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use')
     parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use')
-    parser.add_argument("-p", "--audio-pan", choices=["left", "right", "none"], default="none", help="Pan audio to center from left or right channel.")
-
+    parser.add_argument("-p", "--audio-pan",
+        choices=["left", "right", "none", "auto"],
+        default="none",
+        help="Pan audio to center from left, right, or auto-detect mono audio.")
 
     args = parser.parse_args()
 
@@ -362,5 +450,4 @@ def main():
             csvwriter.writerows(file_data)
 
 if __name__ == "__main__":
-    main()
-
+    main()
\ No newline at end of file

From c2a58260785ce5f88bd8cf268345ec492683188d Mon Sep 17 00:00:00 2001
From: bturkus <benjaminturkus@nypl.org>
Date: Mon, 2 Dec 2024 18:15:23 -0500
Subject: [PATCH 3/5] Update video_processing.py

add pan center
adjust auto pan testing
---
 ami_scripts/video_processing.py | 69 ++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py
index 13ee56f..72f392b 100755
--- a/ami_scripts/video_processing.py
+++ b/ami_scripts/video_processing.py
@@ -122,7 +122,7 @@ def detect_audio_pan(input_file, probe_duration=60):
 
     # Analyze each audio stream
     pan_filters = []
-    for stream_index in audio_streams:
+    for i, stream_index in enumerate(audio_streams):
         print(f"Analyzing audio stream: {stream_index}")
 
         # Analyze left channel
@@ -164,14 +164,13 @@ def get_mean_volume(output):
         silence_threshold = -60.0  # dB, adjust as needed
         if right_mean_volume > silence_threshold and left_mean_volume <= silence_threshold:
             print(f"Stream {stream_index}: Detected right-channel-only audio. Applying right-to-center panning.")
-            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{stream_index}]")
+            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{i}]")
         elif left_mean_volume > silence_threshold and right_mean_volume <= silence_threshold:
             print(f"Stream {stream_index}: Detected left-channel-only audio. Applying left-to-center panning.")
-            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{stream_index}]")
+            pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{i}]")
         else:
             print(f"Stream {stream_index}: Audio is balanced or both channels are silent. No panning applied.")
 
-    # Combine pan filters for FFmpeg command
     return pan_filters
 
 
@@ -179,15 +178,30 @@ def convert_to_mp4(input_file, input_directory, audio_pan):
     output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4"
     output_file = input_directory / output_file_name
 
-    # Detect audio pan automatically if set to "auto"
-    if audio_pan == "auto":
+    # Detect all audio streams in the input file
+    ffprobe_command = [
+        "ffprobe", "-i", str(input_file),
+        "-show_entries", "stream=index:stream=codec_type",
+        "-select_streams", "a", "-of", "compact=p=0:nk=1", "-v", "0"
+    ]
+    ffprobe_result = subprocess.run(ffprobe_command, capture_output=True, text=True)
+    audio_streams = [
+        int(line.split('|')[0])
+        for line in ffprobe_result.stdout.splitlines() if "audio" in line
+    ]
+
+    # Generate pan filters based on user selection
+    pan_filters = []
+    if audio_pan in {"left", "right", "center"}:
+        for i, stream_index in enumerate(audio_streams):
+            if audio_pan == "left":
+                pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{i}]")
+            elif audio_pan == "right":
+                pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{i}]")
+            elif audio_pan == "center":
+                pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0+c1|c1=c0+c1[outa{i}]")
+    elif audio_pan == "auto":
         pan_filters = detect_audio_pan(input_file)
-    else:
-        pan_filters = []
-        if audio_pan == "left":
-            pan_filters.append("[0:a]pan=stereo|c0=c0|c1=c0[outa]")
-        elif audio_pan == "right":
-            pan_filters.append("[0:a]pan=stereo|c0=c1|c1=c1[outa]")
 
     # FFmpeg command setup
     command = [
@@ -197,26 +211,18 @@ def convert_to_mp4(input_file, input_directory, audio_pan):
         "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif",
     ]
 
-    # Add audio mapping and filters if specified
+    # Add audio filters if specified
     if pan_filters:
-        if len(pan_filters) == 1:
-            # Single audio stream case
-            # Ensure consistent output label `[outa]`
-            pan_filters[0] = pan_filters[0].replace("[outa1]", "[outa]")
-            command.extend([
-                "-filter_complex", pan_filters[0],
-                "-map", "[outa]"
-            ])
-        else:
-            # Multi-audio stream case
-            filter_complex = ";".join(pan_filters)
-            command.extend([
-                "-filter_complex", filter_complex,
-            ])
-            for idx in range(len(pan_filters)):
-                command.extend(["-map", f"[outa{idx}]"])
+        filter_complex = ";".join(pan_filters)
+        command.extend([
+            "-filter_complex", filter_complex,
+        ])
+        for i in range(len(pan_filters)):
+            command.extend(["-map", f"[outa{i}]"])
     else:
-        command.extend(["-map", "0:a", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"])
+        # Default mapping for all audio streams without modification
+        for stream_index in audio_streams:
+            command.extend(["-map", f"0:{stream_index}", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"])
 
     # Add output file
     command.append(str(output_file))
@@ -227,7 +233,6 @@ def convert_to_mp4(input_file, input_directory, audio_pan):
     return output_file
 
 
-    
 def convert_mov_file(input_file, input_directory):
     """Convert a MOV file to FFV1 and MP4 formats using FFmpeg"""    
     output_file1 = input_directory / f"{pathlib.Path(input_file).stem}.mkv"
@@ -367,7 +372,7 @@ def main():
     parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use')
     parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use')
     parser.add_argument("-p", "--audio-pan",
-        choices=["left", "right", "none", "auto"],
+        choices=["left", "right", "none", "center", "auto"],
         default="none",
         help="Pan audio to center from left, right, or auto-detect mono audio.")
 

From 234f386558a9a75e29681b592071c57e423da467 Mon Sep 17 00:00:00 2001
From: bturkus <benjaminturkus@nypl.org>
Date: Mon, 2 Dec 2024 18:25:32 -0500
Subject: [PATCH 4/5] Update video_processing.py

---
 ami_scripts/video_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py
index 72f392b..d4eb4f9 100755
--- a/ami_scripts/video_processing.py
+++ b/ami_scripts/video_processing.py
@@ -417,7 +417,7 @@ def main():
         transcribe_directory(input_dir, args.model, args.format)
 
     print("Deleting empty directories...")
-    delete_empty_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies"])
+    delete_empty_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies", "ProcessedDV"])
 
     if args.output:
         project_code_pattern = re.compile(r'(\d{4}_\d{2}_\d{2})')

From 90370cedb0102a5ff7eed33e2ed10f7b5af93b86 Mon Sep 17 00:00:00 2001
From: bturkus <benjaminturkus@nypl.org>
Date: Mon, 2 Dec 2024 18:30:15 -0500
Subject: [PATCH 5/5] Update video_processing.py

2 mins for autodetection of audio streams
---
 ami_scripts/video_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py
index d4eb4f9..bd28764 100755
--- a/ami_scripts/video_processing.py
+++ b/ami_scripts/video_processing.py
@@ -104,7 +104,7 @@ def transcribe_directory(input_directory, model, output_format):
             output_writer(transcription_response, file.stem)
 
 
-def detect_audio_pan(input_file, probe_duration=60):
+def detect_audio_pan(input_file, probe_duration=120):
     """
     Detect whether the audio is isolated to the left or right channel for each audio stream.
     Probes only the first `probe_duration` seconds of the file.