From eaf9308c6a3bf5a1801c72b2cbb7b96d880789cb Mon Sep 17 00:00:00 2001 From: bturkus Date: Wed, 27 Nov 2024 11:23:02 -0500 Subject: [PATCH 1/5] Update video_processing.py --- ami_scripts/video_processing.py | 45 ++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py index f5313b9..eb761e2 100755 --- a/ami_scripts/video_processing.py +++ b/ami_scripts/video_processing.py @@ -32,9 +32,9 @@ def rename_files(input_directory, extensions): shutil.move(file, new_file) -def convert_mkv_dv_to_mp4(input_directory): +def convert_mkv_dv_to_mp4(input_directory, audio_pan): for file in itertools.chain(input_directory.glob("*.mkv"), input_directory.glob("*.dv")): - convert_to_mp4(file, input_directory) + convert_to_mp4(file, input_directory, audio_pan) def process_mov_files(input_directory): @@ -104,22 +104,41 @@ def transcribe_directory(input_directory, model, output_format): output_writer(transcription_response, file.stem) -def convert_to_mp4(input_file, input_directory): +def convert_to_mp4(input_file, input_directory, audio_pan): output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4" output_file = input_directory / output_file_name + + # Default audio filter + audio_filter = None + + # Set audio pan filter based on user input + if audio_pan == "left": + audio_filter = "[0:a]pan=stereo|c0=c0|c1=c0[outa]" + elif audio_pan == "right": + audio_filter = "[0:a]pan=stereo|c0=c1|c1=c1[outa]" + command = [ "ffmpeg", "-i", str(input_file), - "-map", "0:v", "-map", "0:a", - "-c:v", "libx264", - "-movflags", "faststart", - "-pix_fmt", "yuv420p", - "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", - "-vf", "yadif", - "-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file) + "-map", "0:v", "-c:v", "libx264", "-movflags", "faststart", "-pix_fmt", "yuv420p", + "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif", ] - subprocess.check_call(command) + # Add audio mapping and filter if specified + if audio_filter: + command.extend([ + "-filter_complex", audio_filter, + "-map", "[outa]" # Map the output of the pan filter as the audio stream + ]) + else: + command.extend([ + "-map", "0:a", # Default audio mapping if no pan is specified + ]) + + # Add audio encoding options + command.extend(["-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file)]) + + subprocess.check_call(command) return output_file @@ -261,6 +280,8 @@ def main(): parser.add_argument("-o", "--output", help="Path to save csv (optional). If provided, MediaInfo extraction will be performed.", required=False) parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use') parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use') + parser.add_argument("-p", "--audio-pan", choices=["left", "right", "none"], default="none", help="Pan audio to center from left or right channel.") + args = parser.parse_args() @@ -281,7 +302,7 @@ def main(): create_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies"]) print("Converting MKV and DV to MP4...") - convert_mkv_dv_to_mp4(input_dir) + convert_mkv_dv_to_mp4(input_dir, args.audio_pan) print("Processing MOV files...") process_mov_files(input_dir) From 6e3f44ffda72524fb00fdd9c4808a0e771438ff2 Mon Sep 17 00:00:00 2001 From: bturkus Date: Mon, 2 Dec 2024 16:52:26 -0500 Subject: [PATCH 2/5] Update video_processing.py --- ami_scripts/video_processing.py | 133 ++++++++++++++++++++++++++------ 1 file changed, 110 insertions(+), 23 deletions(-) diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py index eb761e2..13ee56f 100755 --- a/ami_scripts/video_processing.py +++ b/ami_scripts/video_processing.py @@ -104,19 +104,92 @@ def transcribe_directory(input_directory, model, output_format): output_writer(transcription_response, file.stem) +def detect_audio_pan(input_file, probe_duration=60): + """ + Detect whether the audio is isolated to the left or right channel for each audio stream. + Probes only the first `probe_duration` seconds of the file. + """ + # Use ffprobe to get the number of audio streams + ffprobe_command = [ + "ffprobe", "-i", str(input_file), + "-show_entries", "stream=index:stream=codec_type", + "-select_streams", "a", "-of", "compact=p=0:nk=1", "-v", "0" + ] + ffprobe_result = subprocess.run(ffprobe_command, capture_output=True, text=True) + audio_streams = [int(line.split('|')[0]) for line in ffprobe_result.stdout.splitlines() if "audio" in line] + + print(f"Detected {len(audio_streams)} audio streams: {audio_streams} in {input_file}") + + # Analyze each audio stream + pan_filters = [] + for stream_index in audio_streams: + print(f"Analyzing audio stream: {stream_index}") + + # Analyze left channel + left_analysis_command = [ + "ffmpeg", "-t", str(probe_duration), "-i", str(input_file), + "-map", f"0:{stream_index}", # Explicitly map the audio stream by index + "-af", "pan=mono|c0=c0,volumedetect", "-f", "null", "-" + ] + left_result = subprocess.run(left_analysis_command, capture_output=True, text=True) + left_output = left_result.stderr + + # Analyze right channel + right_analysis_command = [ + "ffmpeg", "-t", str(probe_duration), "-i", str(input_file), + "-map", f"0:{stream_index}", # Explicitly map the audio stream by index + "-af", "pan=mono|c0=c1,volumedetect", "-f", "null", "-" + ] + right_result = subprocess.run(right_analysis_command, capture_output=True, text=True) + right_output = right_result.stderr + + # Parse mean volumes + def get_mean_volume(output): + match = re.search(r"mean_volume:\s*(-?\d+(\.\d+)?)", output) + return float(match.group(1)) if match else None + + left_mean_volume = get_mean_volume(left_output) + right_mean_volume = get_mean_volume(right_output) + + # Debugging output for clarity + print(f"Stream {stream_index} - Left channel mean volume: {left_mean_volume}") + print(f"Stream {stream_index} - Right channel mean volume: {right_mean_volume}") + + # Handle cases where volume data is unavailable + if left_mean_volume is None or right_mean_volume is None: + print(f"Stream {stream_index}: Unable to analyze audio. Skipping this stream.") + continue + + # Determine if one channel is significantly louder than the other + silence_threshold = -60.0 # dB, adjust as needed + if right_mean_volume > silence_threshold and left_mean_volume <= silence_threshold: + print(f"Stream {stream_index}: Detected right-channel-only audio. Applying right-to-center panning.") + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{stream_index}]") + elif left_mean_volume > silence_threshold and right_mean_volume <= silence_threshold: + print(f"Stream {stream_index}: Detected left-channel-only audio. Applying left-to-center panning.") + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{stream_index}]") + else: + print(f"Stream {stream_index}: Audio is balanced or both channels are silent. No panning applied.") + + # Combine pan filters for FFmpeg command + return pan_filters + + def convert_to_mp4(input_file, input_directory, audio_pan): output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4" output_file = input_directory / output_file_name - # Default audio filter - audio_filter = None - - # Set audio pan filter based on user input - if audio_pan == "left": - audio_filter = "[0:a]pan=stereo|c0=c0|c1=c0[outa]" - elif audio_pan == "right": - audio_filter = "[0:a]pan=stereo|c0=c1|c1=c1[outa]" + # Detect audio pan automatically if set to "auto" + if audio_pan == "auto": + pan_filters = detect_audio_pan(input_file) + else: + pan_filters = [] + if audio_pan == "left": + pan_filters.append("[0:a]pan=stereo|c0=c0|c1=c0[outa]") + elif audio_pan == "right": + pan_filters.append("[0:a]pan=stereo|c0=c1|c1=c1[outa]") + # FFmpeg command setup command = [ "ffmpeg", "-i", str(input_file), @@ -124,23 +197,36 @@ def convert_to_mp4(input_file, input_directory, audio_pan): "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif", ] - # Add audio mapping and filter if specified - if audio_filter: - command.extend([ - "-filter_complex", audio_filter, - "-map", "[outa]" # Map the output of the pan filter as the audio stream - ]) + # Add audio mapping and filters if specified + if pan_filters: + if len(pan_filters) == 1: + # Single audio stream case + # Ensure consistent output label `[outa]` + pan_filters[0] = pan_filters[0].replace("[outa1]", "[outa]") + command.extend([ + "-filter_complex", pan_filters[0], + "-map", "[outa]" + ]) + else: + # Multi-audio stream case + filter_complex = ";".join(pan_filters) + command.extend([ + "-filter_complex", filter_complex, + ]) + for idx in range(len(pan_filters)): + command.extend(["-map", f"[outa{idx}]"]) else: - command.extend([ - "-map", "0:a", # Default audio mapping if no pan is specified - ]) + command.extend(["-map", "0:a", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"]) - # Add audio encoding options - command.extend(["-c:a", "aac", "-b:a", "320000", "-ar", "48000", str(output_file)]) + # Add output file + command.append(str(output_file)) + print(f"FFmpeg command: {' '.join(command)}") # Debugging output subprocess.check_call(command) + print(f"MP4 created: {output_file}") return output_file + def convert_mov_file(input_file, input_directory): """Convert a MOV file to FFV1 and MP4 formats using FFmpeg""" @@ -280,8 +366,10 @@ def main(): parser.add_argument("-o", "--output", help="Path to save csv (optional). If provided, MediaInfo extraction will be performed.", required=False) parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use') parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use') - parser.add_argument("-p", "--audio-pan", choices=["left", "right", "none"], default="none", help="Pan audio to center from left or right channel.") - + parser.add_argument("-p", "--audio-pan", + choices=["left", "right", "none", "auto"], + default="none", + help="Pan audio to center from left, right, or auto-detect mono audio.") args = parser.parse_args() @@ -362,5 +450,4 @@ def main(): csvwriter.writerows(file_data) if __name__ == "__main__": - main() - + main() \ No newline at end of file From c2a58260785ce5f88bd8cf268345ec492683188d Mon Sep 17 00:00:00 2001 From: bturkus Date: Mon, 2 Dec 2024 18:15:23 -0500 Subject: [PATCH 3/5] Update video_processing.py add pan center adjust auto pan testing --- ami_scripts/video_processing.py | 69 ++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py index 13ee56f..72f392b 100755 --- a/ami_scripts/video_processing.py +++ b/ami_scripts/video_processing.py @@ -122,7 +122,7 @@ def detect_audio_pan(input_file, probe_duration=60): # Analyze each audio stream pan_filters = [] - for stream_index in audio_streams: + for i, stream_index in enumerate(audio_streams): print(f"Analyzing audio stream: {stream_index}") # Analyze left channel @@ -164,14 +164,13 @@ def get_mean_volume(output): silence_threshold = -60.0 # dB, adjust as needed if right_mean_volume > silence_threshold and left_mean_volume <= silence_threshold: print(f"Stream {stream_index}: Detected right-channel-only audio. Applying right-to-center panning.") - pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{stream_index}]") + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{i}]") elif left_mean_volume > silence_threshold and right_mean_volume <= silence_threshold: print(f"Stream {stream_index}: Detected left-channel-only audio. Applying left-to-center panning.") - pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{stream_index}]") + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{i}]") else: print(f"Stream {stream_index}: Audio is balanced or both channels are silent. No panning applied.") - # Combine pan filters for FFmpeg command return pan_filters @@ -179,15 +178,30 @@ def convert_to_mp4(input_file, input_directory, audio_pan): output_file_name = f"{input_file.stem.replace('_pm', '')}_sc.mp4" output_file = input_directory / output_file_name - # Detect audio pan automatically if set to "auto" - if audio_pan == "auto": + # Detect all audio streams in the input file + ffprobe_command = [ + "ffprobe", "-i", str(input_file), + "-show_entries", "stream=index:stream=codec_type", + "-select_streams", "a", "-of", "compact=p=0:nk=1", "-v", "0" + ] + ffprobe_result = subprocess.run(ffprobe_command, capture_output=True, text=True) + audio_streams = [ + int(line.split('|')[0]) + for line in ffprobe_result.stdout.splitlines() if "audio" in line + ] + + # Generate pan filters based on user selection + pan_filters = [] + if audio_pan in {"left", "right", "center"}: + for i, stream_index in enumerate(audio_streams): + if audio_pan == "left": + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0|c1=c0[outa{i}]") + elif audio_pan == "right": + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c1|c1=c1[outa{i}]") + elif audio_pan == "center": + pan_filters.append(f"[0:{stream_index}]pan=stereo|c0=c0+c1|c1=c0+c1[outa{i}]") + elif audio_pan == "auto": pan_filters = detect_audio_pan(input_file) - else: - pan_filters = [] - if audio_pan == "left": - pan_filters.append("[0:a]pan=stereo|c0=c0|c1=c0[outa]") - elif audio_pan == "right": - pan_filters.append("[0:a]pan=stereo|c0=c1|c1=c1[outa]") # FFmpeg command setup command = [ @@ -197,26 +211,18 @@ def convert_to_mp4(input_file, input_directory, audio_pan): "-b:v", "3500000", "-bufsize", "1750000", "-maxrate", "3500000", "-vf", "yadif", ] - # Add audio mapping and filters if specified + # Add audio filters if specified if pan_filters: - if len(pan_filters) == 1: - # Single audio stream case - # Ensure consistent output label `[outa]` - pan_filters[0] = pan_filters[0].replace("[outa1]", "[outa]") - command.extend([ - "-filter_complex", pan_filters[0], - "-map", "[outa]" - ]) - else: - # Multi-audio stream case - filter_complex = ";".join(pan_filters) - command.extend([ - "-filter_complex", filter_complex, - ]) - for idx in range(len(pan_filters)): - command.extend(["-map", f"[outa{idx}]"]) + filter_complex = ";".join(pan_filters) + command.extend([ + "-filter_complex", filter_complex, + ]) + for i in range(len(pan_filters)): + command.extend(["-map", f"[outa{i}]"]) else: - command.extend(["-map", "0:a", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"]) + # Default mapping for all audio streams without modification + for stream_index in audio_streams: + command.extend(["-map", f"0:{stream_index}", "-c:a", "aac", "-b:a", "320000", "-ar", "48000"]) # Add output file command.append(str(output_file)) @@ -227,7 +233,6 @@ def convert_to_mp4(input_file, input_directory, audio_pan): return output_file - def convert_mov_file(input_file, input_directory): """Convert a MOV file to FFV1 and MP4 formats using FFmpeg""" output_file1 = input_directory / f"{pathlib.Path(input_file).stem}.mkv" @@ -367,7 +372,7 @@ def main(): parser.add_argument("-m", "--model", default='medium', choices=['tiny', 'base', 'small', 'medium', 'large'], help='The Whisper model to use') parser.add_argument("-f", "--format", default='vtt', choices=['vtt', 'srt', 'txt', 'json'], help='The subtitle output format to use') parser.add_argument("-p", "--audio-pan", - choices=["left", "right", "none", "auto"], + choices=["left", "right", "none", "center", "auto"], default="none", help="Pan audio to center from left, right, or auto-detect mono audio.") From 234f386558a9a75e29681b592071c57e423da467 Mon Sep 17 00:00:00 2001 From: bturkus Date: Mon, 2 Dec 2024 18:25:32 -0500 Subject: [PATCH 4/5] Update video_processing.py --- ami_scripts/video_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py index 72f392b..d4eb4f9 100755 --- a/ami_scripts/video_processing.py +++ b/ami_scripts/video_processing.py @@ -417,7 +417,7 @@ def main(): transcribe_directory(input_dir, args.model, args.format) print("Deleting empty directories...") - delete_empty_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies"]) + delete_empty_directories(input_dir, ["AuxiliaryFiles", "V210", "PreservationMasters", "ServiceCopies", "ProcessedDV"]) if args.output: project_code_pattern = re.compile(r'(\d{4}_\d{2}_\d{2})') From 90370cedb0102a5ff7eed33e2ed10f7b5af93b86 Mon Sep 17 00:00:00 2001 From: bturkus Date: Mon, 2 Dec 2024 18:30:15 -0500 Subject: [PATCH 5/5] Update video_processing.py 2 mins for autodetection of audio streams --- ami_scripts/video_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ami_scripts/video_processing.py b/ami_scripts/video_processing.py index d4eb4f9..bd28764 100755 --- a/ami_scripts/video_processing.py +++ b/ami_scripts/video_processing.py @@ -104,7 +104,7 @@ def transcribe_directory(input_directory, model, output_format): output_writer(transcription_response, file.stem) -def detect_audio_pan(input_file, probe_duration=60): +def detect_audio_pan(input_file, probe_duration=120): """ Detect whether the audio is isolated to the left or right channel for each audio stream. Probes only the first `probe_duration` seconds of the file.