diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index e94459f..b06508a 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -30,13 +30,13 @@ jobs: python-version: ${{ matrix.py }} - uses: actions/checkout@v3 - name: Install soundlibs Ubuntu - run: sudo apt-get update && sudo apt-get install --no-install-recommends -y --fix-missing pkg-config libsndfile1 sox + run: sudo apt-get update && sudo apt-get install --no-install-recommends -y --fix-missing pkg-config libsndfile1 sox ffmpeg if: matrix.os == 'ubuntu-latest' - name: Install soundlibs MacOs - run: brew install libsndfile llvm libomp sox + run: brew install libsndfile llvm libomp sox ffmpeg if: matrix.os == 'macos-latest-xlarge' - name: Install soundlibs Windows - run: choco install libsndfile sox.portable flac + run: choco install libsndfile sox.portable flac ffmpeg if: matrix.os == 'windows-latest' - name: Upgrade pip run: python -m pip install -U pip diff --git a/basic_pitch/data/datasets/slakh.py b/basic_pitch/data/datasets/slakh.py index 6801cdb..260a77b 100644 --- a/basic_pitch/data/datasets/slakh.py +++ b/basic_pitch/data/datasets/slakh.py @@ -44,7 +44,7 @@ def process(self, element: Tuple[str, str]) -> Any: import tempfile import apache_beam as beam - import sox + import ffmpeg from basic_pitch.constants import ( AUDIO_N_CHANNELS, @@ -77,11 +77,10 @@ def process(self, element: Tuple[str, str]) -> Any: return None local_wav_path = "{}_tmp.wav".format(track_local.audio_path) - tfm = sox.Transformer() - tfm.rate(AUDIO_SAMPLE_RATE) - tfm.channels(AUDIO_N_CHANNELS) try: - tfm.build(track_local.audio_path, local_wav_path) + ffmpeg.input(track_local.audio_path).output( + local_wav_path, ar=AUDIO_SAMPLE_RATE, ac=AUDIO_N_CHANNELS + ).run() except Exception as e: logging.info(f"Could not process {local_wav_path}. Exception: {e}") return None @@ -113,7 +112,7 @@ def process(self, element: List[str]) -> List[Any]: import tempfile import numpy as np - import sox + import ffmpeg from basic_pitch.constants import ( AUDIO_N_CHANNELS, @@ -145,12 +144,11 @@ def process(self, element: List[str]) -> List[Any]: d.write(s.read()) local_wav_path = "{}_tmp.wav".format(track_local.audio_path) - tfm = sox.Transformer() - tfm.rate(AUDIO_SAMPLE_RATE) - tfm.channels(AUDIO_N_CHANNELS) - tfm.build(track_local.audio_path, local_wav_path) + ffmpeg.input(track_local.audio_path).output( + local_wav_path, ar=AUDIO_SAMPLE_RATE, ac=AUDIO_N_CHANNELS + ).run() - duration = sox.file_info.duration(local_wav_path) + duration = float(ffmpeg.probe(local_wav_path)["format"]["duration"]) time_scale = np.arange(0, duration + ANNOTATION_HOP, ANNOTATION_HOP) n_time_frames = len(time_scale) diff --git a/pyproject.toml b/pyproject.toml index 4e23b0f..8843074 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,8 @@ data = [ "apache_beam", "mirdata", "smart_open", - "sox" + "sox", + "ffmpeg-python" ] test = [ "basic_pitch[data]",