From 5ac9a397b9a2e408abc8568fae737eb6ef293672 Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:02:22 +0000
Subject: [PATCH 01/16] fix numpy compatibility while replicating existing
 behavior

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 4 ++--
 nemo/collections/asr/parts/preprocessing/segment.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 8c629cf4cfd4..17725fcebe35 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -50,10 +50,10 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index aceab6637006..c518fcfa5b66 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -260,10 +260,10 @@ def _convert_samples_to_float32(samples):
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)

From 9738306d13d8a6682007d0b2ede0d496a5a2ec7e Mon Sep 17 00:00:00 2001
From: andylamp <andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:36:35 +0000
Subject: [PATCH 02/16] Apply isort and black reformatting

Signed-off-by: andylamp <andylamp@users.noreply.github.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 17725fcebe35..858e3acb68c8 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -18,12 +18,13 @@
 
 
 class ExternalFeatureLoader(object):
-    """Feature loader that load external features store in certain format. 
+    """Feature loader that load external features store in certain format.
     Currently support pickle, npy and npz format.
     """
 
     def __init__(
-        self, augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
+        self,
+        augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
     ):
         """
         Feature loader

From 00de6879378b4dbd8122e38525d88699a524da03 Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:40:36 +0000
Subject: [PATCH 03/16] add docstrings and fix line lengths

---
 .../asr/parts/preprocessing/feature_loader.py |  6 ++-
 .../asr/parts/preprocessing/segment.py        | 48 +++++++++++++------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 858e3acb68c8..e715d2dafb95 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -61,13 +61,17 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         return float32_samples
 
     def process(self, file_path: str) -> torch.Tensor:
+        """Processes the features from the provided `file_path`."""
         features = self.load_feature_from_file(file_path)
         features = self.process_segment(features)
         return features
 
     def process_segment(self, feature_segment):
+        """Processes the provided feature segment."""
         if self.augmentor:
-            # augmentor for external features. Here possible augmentor for external embedding feature is Diaconis Augmentation and might be implemented later
+            # augmentor for external features. Here possible augmentor for
+            # external embedding feature is Diaconis Augmentation and might
+            # be implemented later
             self.augmentor.perturb(feature_segment)
             return torch.tensor(feature_segment, dtype=torch.float)
 
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index c518fcfa5b66..d43fd3aa53d0 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -67,14 +67,15 @@
 
 def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelectorType] = None) -> npt.NDArray:
     """
-    Convert a multi-channel signal to a single-channel signal by averaging over channels or selecting a single channel,
-    or pass-through multi-channel signal when channel_selector is `None`.
+    Convert a multi-channel signal to a single-channel signal by averaging over channels or
+    selecting a single channel, or pass-through multi-channel signal when channel_selector is `None`.
 
     Args:
         signal: numpy array with shape (..., num_channels)
-        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                          of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                          If set to `None`, the original signal will be returned. Uses zero-based indexing.
+        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                          or an iterable of integers denoting a subset of channels. Channel selector is
+                          using zero-based indexing. If set to `None`, the original signal will be returned.
+                          Uses zero-based indexing.
 
     Returns:
         numpy array
@@ -92,7 +93,8 @@ def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelec
 
     if num_channels >= num_samples:
         logging.warning(
-            'Number of channels (%d) is greater or equal than number of samples (%d). Check for possible transposition.',
+            'Number of channels (%d) is greater or equal than number of samples (%d). '
+            'Check for possible transposition.',
             num_channels,
             num_samples,
         )
@@ -199,7 +201,8 @@ def __init__(
             samples = samples.transpose()
             sample_rate = target_sr
         if trim:
-            # librosa is using channels-first layout (num_channels, num_samples), which is transpose of AudioSegment's layout
+            # librosa is using channels-first layout (num_channels, num_samples),
+            # which is transpose of AudioSegment's layout
             samples = samples.transpose()
             samples, _ = librosa.effects.trim(
                 samples, top_db=trim_top_db, ref=trim_ref, frame_length=trim_frame_length, hop_length=trim_hop_length
@@ -303,11 +306,12 @@ def from_file(
         :param trim_frame_length: the number of samples per analysis frame
         :param trim_hop_length: the number of samples between analysis frames
         :param orig_sr: the original sample rate
-        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                                 of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                                 If set to `None`, the original signal will be used.
+        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                                 or an iterable of integers denoting a subset of channels. Channel selector is using
+                                 zero-based indexing. If set to `None`, the original signal will be used.
         :param normalize_db (Optional[float]): if not None, normalize the audio signal to a target RMS value
-        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio, set None to use max RMS across channels
+        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio,
+                                            set None to use max RMS across channels
         :return: AudioSegment instance
         """
         samples = None
@@ -441,7 +445,8 @@ def from_file_list(
             # Only single-channel individual files are supported for now
             if a_segment.num_channels != 1:
                 raise RuntimeError(
-                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} channels from file {a_file}'
+                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} '
+                    f'channels from file {a_file}'
                 )
 
             if target_sr is None:
@@ -523,14 +528,16 @@ def segment_from_file(
                         audio_start = math.floor(offset * sample_rate)
                         if audio_start > max_audio_start:
                             raise RuntimeError(
-                                f'Provided audio start ({audio_start}) is larger than the maximum possible ({max_audio_start})'
+                                f'Provided audio start ({audio_start}) is larger than the '
+                                f'maximum possible ({max_audio_start})'
                             )
                     f.seek(audio_start)
                     samples = f.read(n_segments_at_original_sr, dtype=dtype)
                     is_segmented = True
                 elif n_segments_at_original_sr > len(f):
                     logging.warning(
-                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) of the audio file {audio_file}. This may lead to shape mismatch errors."
+                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) "
+                        f"of the audio file {audio_file}. This may lead to shape mismatch errors."
                     )
                     samples = f.read(dtype=dtype)
                 else:
@@ -550,14 +557,17 @@ def segment_from_file(
 
     @property
     def samples(self):
+        """Returns a copy of the samples."""
         return self._samples.copy()
 
     @property
     def sample_rate(self):
+        """Returns the sample rate of the segment."""
         return self._sample_rate
 
     @property
     def num_channels(self):
+        """Returns the number of channels in the segment."""
         if self._samples.ndim == 1:
             return 1
         else:
@@ -565,10 +575,12 @@ def num_channels(self):
 
     @property
     def num_samples(self):
+        """Returns the number of samples in the segment."""
         return self._samples.shape[0]
 
     @property
     def duration(self):
+        """Returns the duration of the segment in seconds."""
         return self.num_samples / float(self._sample_rate)
 
     @property
@@ -579,21 +591,26 @@ def rms_db(self):
 
     @property
     def orig_sr(self):
+        """Returns the original sample rate of the segment."""
         return self._orig_sr
 
     @property
     def offset(self):
+        """Returns the offset used for the segment."""
         return float(self._offset) if self._offset is not None else None
 
     @property
     def audio_file(self):
+        """Returns the audio file that the segment was loaded from."""
         return str(self._audio_file) if self._audio_file is not None else None
 
     def is_empty(self):
+        """Checks if the segment is empty."""
         mean_square = np.sum(np.mean(self._samples**2, axis=0))
         return self.num_samples == 0 or mean_square == 0
 
     def gain_db(self, gain):
+        """Returns the gain in decibels."""
         self._samples *= 10.0 ** (gain / 20.0)
 
     def normalize_db(self, target_db=-20, ref_channel=None):
@@ -622,7 +639,8 @@ def pad(self, pad_size, symmetric=False):
             pad_width = ((pad_size, pad_size), (0, 0)) if symmetric else ((0, pad_size), (0, 0))
         else:
             raise NotImplementedError(
-                f"Padding not implemented for signals with more that 2 dimensions. Current samples dimension: {samples_ndim}."
+                f"Padding not implemented for signals with more that 2 dimensions. "
+                f"Current samples dimension: {samples_ndim}."
             )
         # apply padding
         self._samples = np.pad(

From 8a2c620ac9ec2e34bce94281c79224ffd9e7e78c Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:56:01 +0000
Subject: [PATCH 04/16] fix final warning for line len

---
 nemo/collections/asr/parts/preprocessing/segment.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index d43fd3aa53d0..00558769b020 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -419,7 +419,8 @@ def from_file_list(
             # Shortcut when selecting a single channel
             if channel_selector >= len(audio_file_list):
                 raise RuntimeError(
-                    f'Channel cannot be selected: channel_selector={channel_selector}, num_audio_files={len(audio_file_list)}'
+                    f'Channel cannot be selected: channel_selector={channel_selector}, '
+                    f'num_audio_files={len(audio_file_list)}'
                 )
             # Select only a single file
             audio_file_list = [audio_file_list[channel_selector]]

From 5f6495a6226bd6501f40b91ac00d98199011260c Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:02:22 +0000
Subject: [PATCH 05/16] fix numpy compatibility while replicating existing
 behavior

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 4 ++--
 nemo/collections/asr/parts/preprocessing/segment.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 8c629cf4cfd4..17725fcebe35 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -50,10 +50,10 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index aceab6637006..c518fcfa5b66 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -260,10 +260,10 @@ def _convert_samples_to_float32(samples):
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)

From 183d5c5204ec5fea7c16617e18294e0918b55ad9 Mon Sep 17 00:00:00 2001
From: andylamp <andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:36:35 +0000
Subject: [PATCH 06/16] Apply isort and black reformatting

Signed-off-by: andylamp <andylamp@users.noreply.github.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 17725fcebe35..858e3acb68c8 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -18,12 +18,13 @@
 
 
 class ExternalFeatureLoader(object):
-    """Feature loader that load external features store in certain format. 
+    """Feature loader that load external features store in certain format.
     Currently support pickle, npy and npz format.
     """
 
     def __init__(
-        self, augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
+        self,
+        augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
     ):
         """
         Feature loader

From 5d842fa4297f93b2c3bcf11e33195791467ee95e Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:40:36 +0000
Subject: [PATCH 07/16] add docstrings and fix line lengths

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 .../asr/parts/preprocessing/feature_loader.py |  6 ++-
 .../asr/parts/preprocessing/segment.py        | 48 +++++++++++++------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 858e3acb68c8..e715d2dafb95 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -61,13 +61,17 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         return float32_samples
 
     def process(self, file_path: str) -> torch.Tensor:
+        """Processes the features from the provided `file_path`."""
         features = self.load_feature_from_file(file_path)
         features = self.process_segment(features)
         return features
 
     def process_segment(self, feature_segment):
+        """Processes the provided feature segment."""
         if self.augmentor:
-            # augmentor for external features. Here possible augmentor for external embedding feature is Diaconis Augmentation and might be implemented later
+            # augmentor for external features. Here possible augmentor for
+            # external embedding feature is Diaconis Augmentation and might
+            # be implemented later
             self.augmentor.perturb(feature_segment)
             return torch.tensor(feature_segment, dtype=torch.float)
 
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index c518fcfa5b66..d43fd3aa53d0 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -67,14 +67,15 @@
 
 def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelectorType] = None) -> npt.NDArray:
     """
-    Convert a multi-channel signal to a single-channel signal by averaging over channels or selecting a single channel,
-    or pass-through multi-channel signal when channel_selector is `None`.
+    Convert a multi-channel signal to a single-channel signal by averaging over channels or
+    selecting a single channel, or pass-through multi-channel signal when channel_selector is `None`.
 
     Args:
         signal: numpy array with shape (..., num_channels)
-        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                          of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                          If set to `None`, the original signal will be returned. Uses zero-based indexing.
+        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                          or an iterable of integers denoting a subset of channels. Channel selector is
+                          using zero-based indexing. If set to `None`, the original signal will be returned.
+                          Uses zero-based indexing.
 
     Returns:
         numpy array
@@ -92,7 +93,8 @@ def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelec
 
     if num_channels >= num_samples:
         logging.warning(
-            'Number of channels (%d) is greater or equal than number of samples (%d). Check for possible transposition.',
+            'Number of channels (%d) is greater or equal than number of samples (%d). '
+            'Check for possible transposition.',
             num_channels,
             num_samples,
         )
@@ -199,7 +201,8 @@ def __init__(
             samples = samples.transpose()
             sample_rate = target_sr
         if trim:
-            # librosa is using channels-first layout (num_channels, num_samples), which is transpose of AudioSegment's layout
+            # librosa is using channels-first layout (num_channels, num_samples),
+            # which is transpose of AudioSegment's layout
             samples = samples.transpose()
             samples, _ = librosa.effects.trim(
                 samples, top_db=trim_top_db, ref=trim_ref, frame_length=trim_frame_length, hop_length=trim_hop_length
@@ -303,11 +306,12 @@ def from_file(
         :param trim_frame_length: the number of samples per analysis frame
         :param trim_hop_length: the number of samples between analysis frames
         :param orig_sr: the original sample rate
-        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                                 of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                                 If set to `None`, the original signal will be used.
+        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                                 or an iterable of integers denoting a subset of channels. Channel selector is using
+                                 zero-based indexing. If set to `None`, the original signal will be used.
         :param normalize_db (Optional[float]): if not None, normalize the audio signal to a target RMS value
-        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio, set None to use max RMS across channels
+        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio,
+                                            set None to use max RMS across channels
         :return: AudioSegment instance
         """
         samples = None
@@ -441,7 +445,8 @@ def from_file_list(
             # Only single-channel individual files are supported for now
             if a_segment.num_channels != 1:
                 raise RuntimeError(
-                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} channels from file {a_file}'
+                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} '
+                    f'channels from file {a_file}'
                 )
 
             if target_sr is None:
@@ -523,14 +528,16 @@ def segment_from_file(
                         audio_start = math.floor(offset * sample_rate)
                         if audio_start > max_audio_start:
                             raise RuntimeError(
-                                f'Provided audio start ({audio_start}) is larger than the maximum possible ({max_audio_start})'
+                                f'Provided audio start ({audio_start}) is larger than the '
+                                f'maximum possible ({max_audio_start})'
                             )
                     f.seek(audio_start)
                     samples = f.read(n_segments_at_original_sr, dtype=dtype)
                     is_segmented = True
                 elif n_segments_at_original_sr > len(f):
                     logging.warning(
-                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) of the audio file {audio_file}. This may lead to shape mismatch errors."
+                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) "
+                        f"of the audio file {audio_file}. This may lead to shape mismatch errors."
                     )
                     samples = f.read(dtype=dtype)
                 else:
@@ -550,14 +557,17 @@ def segment_from_file(
 
     @property
     def samples(self):
+        """Returns a copy of the samples."""
         return self._samples.copy()
 
     @property
     def sample_rate(self):
+        """Returns the sample rate of the segment."""
         return self._sample_rate
 
     @property
     def num_channels(self):
+        """Returns the number of channels in the segment."""
         if self._samples.ndim == 1:
             return 1
         else:
@@ -565,10 +575,12 @@ def num_channels(self):
 
     @property
     def num_samples(self):
+        """Returns the number of samples in the segment."""
         return self._samples.shape[0]
 
     @property
     def duration(self):
+        """Returns the duration of the segment in seconds."""
         return self.num_samples / float(self._sample_rate)
 
     @property
@@ -579,21 +591,26 @@ def rms_db(self):
 
     @property
     def orig_sr(self):
+        """Returns the original sample rate of the segment."""
         return self._orig_sr
 
     @property
     def offset(self):
+        """Returns the offset used for the segment."""
         return float(self._offset) if self._offset is not None else None
 
     @property
     def audio_file(self):
+        """Returns the audio file that the segment was loaded from."""
         return str(self._audio_file) if self._audio_file is not None else None
 
     def is_empty(self):
+        """Checks if the segment is empty."""
         mean_square = np.sum(np.mean(self._samples**2, axis=0))
         return self.num_samples == 0 or mean_square == 0
 
     def gain_db(self, gain):
+        """Returns the gain in decibels."""
         self._samples *= 10.0 ** (gain / 20.0)
 
     def normalize_db(self, target_db=-20, ref_channel=None):
@@ -622,7 +639,8 @@ def pad(self, pad_size, symmetric=False):
             pad_width = ((pad_size, pad_size), (0, 0)) if symmetric else ((0, pad_size), (0, 0))
         else:
             raise NotImplementedError(
-                f"Padding not implemented for signals with more that 2 dimensions. Current samples dimension: {samples_ndim}."
+                f"Padding not implemented for signals with more that 2 dimensions. "
+                f"Current samples dimension: {samples_ndim}."
             )
         # apply padding
         self._samples = np.pad(

From 9588148c5293c54ec91fdbbd3cfc37dbf5c75a1a Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:56:01 +0000
Subject: [PATCH 08/16] fix final warning for line len

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/segment.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index d43fd3aa53d0..00558769b020 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -419,7 +419,8 @@ def from_file_list(
             # Shortcut when selecting a single channel
             if channel_selector >= len(audio_file_list):
                 raise RuntimeError(
-                    f'Channel cannot be selected: channel_selector={channel_selector}, num_audio_files={len(audio_file_list)}'
+                    f'Channel cannot be selected: channel_selector={channel_selector}, '
+                    f'num_audio_files={len(audio_file_list)}'
                 )
             # Select only a single file
             audio_file_list = [audio_file_list[channel_selector]]

From 5cfafb529ef39f39b69f8a888b9a988643d75f3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 28 Nov 2024 01:19:27 +0100
Subject: [PATCH 09/16] ci: Allow dry-run of release (#11418)

* ci: Allow dry-run of release

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>

* fix

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>

* finalize

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>

---------

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 .github/workflows/release.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 03474251f995..81db8e1160d9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -20,10 +20,15 @@ on:
         description: Ref (SHA or branch name) to release
         required: true
         type: string
+      dry-run:
+        description: Do not publish a wheel and GitHub release.
+        required: true
+        default: true
+        type: boolean
 
 jobs: 
   release:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.12.3
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.15.0
     with:
       release-ref: ${{ inputs.release-ref }}
       image-name: nemo_container
@@ -35,8 +40,10 @@ jobs:
       python-package: nemo
       container-workdir: /workspace
       library-name: Neural Modules
+      dry-run: ${{ inputs.dry-run }}
     secrets:
       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
       PAT: ${{ secrets.PAT }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

From 4d4070963a86b93329f9ad88ee7f575956777186 Mon Sep 17 00:00:00 2001
From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com>
Date: Thu, 28 Nov 2024 01:03:58 -0800
Subject: [PATCH 10/16] fix dtype when init HF model from config (#11420)

* fix dtype when init HF model from config

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>

---------

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
Co-authored-by: akoumpa <akoumpa@users.noreply.github.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 .../llm/gpt/model/hf_auto_model_for_causal_lm.py           | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
index 8f4595bd6cee..481dd9a0e187 100644
--- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
+++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
@@ -41,6 +41,7 @@ def __init__(
         model_transform=None,
         model_accelerator=None,
         trust_remote_code=False,
+        default_dtype=torch.bfloat16,
     ):
         super().__init__()
         self.save_hyperparameters()
@@ -53,6 +54,7 @@ def __init__(
         self.model_transform = model_transform
         self.model_accelerator = model_accelerator
         self.trust_remote_code = trust_remote_code
+        self.default_dtype = default_dtype
 
     @property
     def tokenizer(self):
@@ -79,7 +81,10 @@ def configure_model(self):
             from transformers import AutoConfig
 
             config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=self.trust_remote_code)
-            self.model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code)
+            dtype = getattr(config, 'torch_dtype', self.default_dtype)
+            self.model = AutoModelForCausalLM.from_config(
+                config, torch_dtype=dtype, trust_remote_code=self.trust_remote_code
+            )
 
         if self.model_accelerator is not None:
             self.model_accelerator(self.model)

From f7fa43f1cc7f00772922ed3d996ce30baafb109c Mon Sep 17 00:00:00 2001
From: nune-tadevosyan <152167970+nune-tadevosyan@users.noreply.github.com>
Date: Thu, 28 Nov 2024 13:27:01 +0400
Subject: [PATCH 11/16] Removing unnecessary lines (#11408)

Signed-off-by: Nune <ntadevosyan@nvidia.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/data/audio_to_text_dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo/collections/asr/data/audio_to_text_dataset.py b/nemo/collections/asr/data/audio_to_text_dataset.py
index f91710de3cb3..3e1301dd4d53 100644
--- a/nemo/collections/asr/data/audio_to_text_dataset.py
+++ b/nemo/collections/asr/data/audio_to_text_dataset.py
@@ -871,7 +871,6 @@ def write_on_batch_end(
                     item["audio_filepath"] = sample.recording.sources[0].source
                 else:
                     item["audio_filepath"] = sample.id
-                item["audio_filepath"] = sample.recording.sources[0].source
                 item["offset"] = sample.start
                 item["duration"] = sample.duration
                 item["text"] = sample.supervisions[0].text or ''

From ab0ac8bc276a5efd7b0d75934e621d9c56202d7c Mon Sep 17 00:00:00 2001
From: Jan Lasek <janek.lasek@gmail.com>
Date: Fri, 29 Nov 2024 17:01:56 +0100
Subject: [PATCH 12/16] Handle import errors in virtual environment when
 running vLLM tests (#11435)

* Remove try / catch block to propagate import errors

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>

* Small rewrite to handle import errors in export/deploy scripts

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>

* Apply isort and black reformatting

Signed-off-by: janekl <janekl@users.noreply.github.com>

---------

Signed-off-by: Jan Lasek <janek.lasek@gmail.com>
Signed-off-by: janekl <janekl@users.noreply.github.com>
Co-authored-by: janekl <janekl@users.noreply.github.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/deploy/nlp/__init__.py             | 14 ++------------
 scripts/deploy/nlp/query_inframework.py |  2 +-
 tests/deploy/nemo_deploy.py             |  2 +-
 tests/export/nemo_export.py             | 12 ++++--------
 4 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/nemo/deploy/nlp/__init__.py b/nemo/deploy/nlp/__init__.py
index 5ebbe6816664..633544e300ed 100644
--- a/nemo/deploy/nlp/__init__.py
+++ b/nemo/deploy/nlp/__init__.py
@@ -12,15 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-use_query_llm = True
-try:
-    from nemo.deploy.nlp.query_llm import NemoQueryLLM, NemoQueryLLMPyTorch
-except Exception:
-    use_query_llm = False
-
-use_megatron_llm = True
-try:
-    from nemo.deploy.nlp.megatronllm_deployable import MegatronLLMDeployable
-except Exception:
-    use_megatron_llm = False
+from nemo.deploy.nlp.megatronllm_deployable import MegatronLLMDeployable
+from nemo.deploy.nlp.query_llm import NemoQueryLLM, NemoQueryLLMPyTorch
diff --git a/scripts/deploy/nlp/query_inframework.py b/scripts/deploy/nlp/query_inframework.py
index e77ab72a1f04..a62e09fa071d 100644
--- a/scripts/deploy/nlp/query_inframework.py
+++ b/scripts/deploy/nlp/query_inframework.py
@@ -15,7 +15,7 @@
 import argparse
 import sys
 
-from nemo.deploy.nlp.query_llm import NemoQueryLLMPyTorch
+from nemo.deploy.nlp import NemoQueryLLMPyTorch
 
 
 def get_args(argv):
diff --git a/tests/deploy/nemo_deploy.py b/tests/deploy/nemo_deploy.py
index 23db7c4f01f3..45f2bae3425e 100644
--- a/tests/deploy/nemo_deploy.py
+++ b/tests/deploy/nemo_deploy.py
@@ -21,7 +21,7 @@
 
 import torch
 
-from nemo.deploy.nlp.megatronllm_deployable import MegatronLLMDeployable
+from nemo.deploy.nlp import MegatronLLMDeployable
 from tests.infer_data_path import get_infer_test_data
 
 run_export_tests = True
diff --git a/tests/export/nemo_export.py b/tests/export/nemo_export.py
index df6a68828d41..cb2b3619e4d3 100644
--- a/tests/export/nemo_export.py
+++ b/tests/export/nemo_export.py
@@ -43,7 +43,8 @@
     from nemo.deploy.nlp import MegatronLLMDeployable, NemoQueryLLMPyTorch
 except Exception as e:
     LOGGER.warning(
-        f"Cannot import MegatronLLMDeployable, in-framework inference will not be available. {type(e).__name__}: {e}"
+        "Cannot import MegatronLLMDeployable or NemoQueryLLMPyTorch,"
+        f" in-framework inference will not be available. {type(e).__name__}: {e}"
     )
     in_framework_supported = False
 
@@ -104,12 +105,7 @@ def get_accuracy_with_lambada(model, nq, task_ids, lora_uids, test_data_path):
             all_expected_outputs.append(expected_output)
             if model is not None:
 
-                in_framework_model = False
-                if in_framework_supported:
-                    if isinstance(model, MegatronLLMDeployable):
-                        in_framework_model = True
-
-                if in_framework_model:
+                if in_framework_supported and isinstance(model, MegatronLLMDeployable):
                     model_output = model.generate(
                         inputs=[prompt],
                         length_params={"min_length": 1, "max_length": 1},
@@ -153,7 +149,7 @@ def get_accuracy_with_lambada(model, nq, task_ids, lora_uids, test_data_path):
                     correct_answers_relaxed += 1
 
             if nq is not None:
-                if isinstance(nq, NemoQueryLLMPyTorch):
+                if in_framework_supported and isinstance(nq, NemoQueryLLMPyTorch):
                     deployed_output = nq.query_llm(
                         prompts=[prompt],
                         max_length=1,

From 06e0e4ea496ca6ab41ae3d82040270d3c71099a4 Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:02:22 +0000
Subject: [PATCH 13/16] fix numpy compatibility while replicating existing
 behavior

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 4 ++--
 nemo/collections/asr/parts/preprocessing/segment.py        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 8c629cf4cfd4..17725fcebe35 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -50,10 +50,10 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index aceab6637006..c518fcfa5b66 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -260,10 +260,10 @@ def _convert_samples_to_float32(samples):
         Integers will be scaled to [-1, 1] in float32.
         """
         float32_samples = samples.astype('float32')
-        if samples.dtype in np.sctypes['int']:
+        if samples.dtype in (np.int8, np.int16, np.int32, np.int64):
             bits = np.iinfo(samples.dtype).bits
             float32_samples *= 1.0 / 2 ** (bits - 1)
-        elif samples.dtype in np.sctypes['float']:
+        elif samples.dtype in (np.float16, np.float32, np.float64):
             pass
         else:
             raise TypeError("Unsupported sample type: %s." % samples.dtype)

From eab5901d847c9fac9089aaa79a484f550c3b602e Mon Sep 17 00:00:00 2001
From: andylamp <andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 00:36:35 +0000
Subject: [PATCH 14/16] Apply isort and black reformatting

Signed-off-by: andylamp <andylamp@users.noreply.github.com>
Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/feature_loader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 17725fcebe35..858e3acb68c8 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -18,12 +18,13 @@
 
 
 class ExternalFeatureLoader(object):
-    """Feature loader that load external features store in certain format. 
+    """Feature loader that load external features store in certain format.
     Currently support pickle, npy and npz format.
     """
 
     def __init__(
-        self, augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
+        self,
+        augmentor: Optional["nemo.collections.asr.parts.perturb.FeatureAugmentor"] = None,
     ):
         """
         Feature loader

From 4b2e882660f3da66e7738955b8e5f18c21b104df Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:40:36 +0000
Subject: [PATCH 15/16] add docstrings and fix line lengths

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 .../asr/parts/preprocessing/feature_loader.py |  6 ++-
 .../asr/parts/preprocessing/segment.py        | 48 +++++++++++++------
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/nemo/collections/asr/parts/preprocessing/feature_loader.py b/nemo/collections/asr/parts/preprocessing/feature_loader.py
index 858e3acb68c8..e715d2dafb95 100644
--- a/nemo/collections/asr/parts/preprocessing/feature_loader.py
+++ b/nemo/collections/asr/parts/preprocessing/feature_loader.py
@@ -61,13 +61,17 @@ def _convert_samples_to_float32(samples: np.ndarray) -> np.ndarray:
         return float32_samples
 
     def process(self, file_path: str) -> torch.Tensor:
+        """Processes the features from the provided `file_path`."""
         features = self.load_feature_from_file(file_path)
         features = self.process_segment(features)
         return features
 
     def process_segment(self, feature_segment):
+        """Processes the provided feature segment."""
         if self.augmentor:
-            # augmentor for external features. Here possible augmentor for external embedding feature is Diaconis Augmentation and might be implemented later
+            # augmentor for external features. Here possible augmentor for
+            # external embedding feature is Diaconis Augmentation and might
+            # be implemented later
             self.augmentor.perturb(feature_segment)
             return torch.tensor(feature_segment, dtype=torch.float)
 
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index c518fcfa5b66..d43fd3aa53d0 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -67,14 +67,15 @@
 
 def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelectorType] = None) -> npt.NDArray:
     """
-    Convert a multi-channel signal to a single-channel signal by averaging over channels or selecting a single channel,
-    or pass-through multi-channel signal when channel_selector is `None`.
+    Convert a multi-channel signal to a single-channel signal by averaging over channels or
+    selecting a single channel, or pass-through multi-channel signal when channel_selector is `None`.
 
     Args:
         signal: numpy array with shape (..., num_channels)
-        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                          of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                          If set to `None`, the original signal will be returned. Uses zero-based indexing.
+        channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                          or an iterable of integers denoting a subset of channels. Channel selector is
+                          using zero-based indexing. If set to `None`, the original signal will be returned.
+                          Uses zero-based indexing.
 
     Returns:
         numpy array
@@ -92,7 +93,8 @@ def select_channels(signal: npt.NDArray, channel_selector: Optional[ChannelSelec
 
     if num_channels >= num_samples:
         logging.warning(
-            'Number of channels (%d) is greater or equal than number of samples (%d). Check for possible transposition.',
+            'Number of channels (%d) is greater or equal than number of samples (%d). '
+            'Check for possible transposition.',
             num_channels,
             num_samples,
         )
@@ -199,7 +201,8 @@ def __init__(
             samples = samples.transpose()
             sample_rate = target_sr
         if trim:
-            # librosa is using channels-first layout (num_channels, num_samples), which is transpose of AudioSegment's layout
+            # librosa is using channels-first layout (num_channels, num_samples),
+            # which is transpose of AudioSegment's layout
             samples = samples.transpose()
             samples, _ = librosa.effects.trim(
                 samples, top_db=trim_top_db, ref=trim_ref, frame_length=trim_frame_length, hop_length=trim_hop_length
@@ -303,11 +306,12 @@ def from_file(
         :param trim_frame_length: the number of samples per analysis frame
         :param trim_hop_length: the number of samples between analysis frames
         :param orig_sr: the original sample rate
-        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected, or an iterable
-                                 of integers denoting a subset of channels. Channel selector is using zero-based indexing.
-                                 If set to `None`, the original signal will be used.
+        :param channel selector: string denoting the downmix mode, an integer denoting the channel to be selected,
+                                 or an iterable of integers denoting a subset of channels. Channel selector is using
+                                 zero-based indexing. If set to `None`, the original signal will be used.
         :param normalize_db (Optional[float]): if not None, normalize the audio signal to a target RMS value
-        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio, set None to use max RMS across channels
+        :param ref_channel (Optional[int]): channel to use as reference for normalizing multi-channel audio,
+                                            set None to use max RMS across channels
         :return: AudioSegment instance
         """
         samples = None
@@ -441,7 +445,8 @@ def from_file_list(
             # Only single-channel individual files are supported for now
             if a_segment.num_channels != 1:
                 raise RuntimeError(
-                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} channels from file {a_file}'
+                    f'Expecting a single-channel audio signal, but loaded {a_segment.num_channels} '
+                    f'channels from file {a_file}'
                 )
 
             if target_sr is None:
@@ -523,14 +528,16 @@ def segment_from_file(
                         audio_start = math.floor(offset * sample_rate)
                         if audio_start > max_audio_start:
                             raise RuntimeError(
-                                f'Provided audio start ({audio_start}) is larger than the maximum possible ({max_audio_start})'
+                                f'Provided audio start ({audio_start}) is larger than the '
+                                f'maximum possible ({max_audio_start})'
                             )
                     f.seek(audio_start)
                     samples = f.read(n_segments_at_original_sr, dtype=dtype)
                     is_segmented = True
                 elif n_segments_at_original_sr > len(f):
                     logging.warning(
-                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) of the audio file {audio_file}. This may lead to shape mismatch errors."
+                        f"Number of segments ({n_segments_at_original_sr}) is greater than the length ({len(f)}) "
+                        f"of the audio file {audio_file}. This may lead to shape mismatch errors."
                     )
                     samples = f.read(dtype=dtype)
                 else:
@@ -550,14 +557,17 @@ def segment_from_file(
 
     @property
     def samples(self):
+        """Returns a copy of the samples."""
         return self._samples.copy()
 
     @property
     def sample_rate(self):
+        """Returns the sample rate of the segment."""
         return self._sample_rate
 
     @property
     def num_channels(self):
+        """Returns the number of channels in the segment."""
         if self._samples.ndim == 1:
             return 1
         else:
@@ -565,10 +575,12 @@ def num_channels(self):
 
     @property
     def num_samples(self):
+        """Returns the number of samples in the segment."""
         return self._samples.shape[0]
 
     @property
     def duration(self):
+        """Returns the duration of the segment in seconds."""
         return self.num_samples / float(self._sample_rate)
 
     @property
@@ -579,21 +591,26 @@ def rms_db(self):
 
     @property
     def orig_sr(self):
+        """Returns the original sample rate of the segment."""
         return self._orig_sr
 
     @property
     def offset(self):
+        """Returns the offset used for the segment."""
         return float(self._offset) if self._offset is not None else None
 
     @property
     def audio_file(self):
+        """Returns the audio file that the segment was loaded from."""
         return str(self._audio_file) if self._audio_file is not None else None
 
     def is_empty(self):
+        """Checks if the segment is empty."""
         mean_square = np.sum(np.mean(self._samples**2, axis=0))
         return self.num_samples == 0 or mean_square == 0
 
     def gain_db(self, gain):
+        """Returns the gain in decibels."""
         self._samples *= 10.0 ** (gain / 20.0)
 
     def normalize_db(self, target_db=-20, ref_channel=None):
@@ -622,7 +639,8 @@ def pad(self, pad_size, symmetric=False):
             pad_width = ((pad_size, pad_size), (0, 0)) if symmetric else ((0, pad_size), (0, 0))
         else:
             raise NotImplementedError(
-                f"Padding not implemented for signals with more that 2 dimensions. Current samples dimension: {samples_ndim}."
+                f"Padding not implemented for signals with more that 2 dimensions. "
+                f"Current samples dimension: {samples_ndim}."
             )
         # apply padding
         self._samples = np.pad(

From aa306e3c02b64f6e9a655f1c617792a135f9b96f Mon Sep 17 00:00:00 2001
From: andylamp <2177249+andylamp@users.noreply.github.com>
Date: Mon, 2 Dec 2024 01:56:01 +0000
Subject: [PATCH 16/16] fix final warning for line len

Signed-off-by: andylamp <2177249+andylamp@users.noreply.github.com>
---
 nemo/collections/asr/parts/preprocessing/segment.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index d43fd3aa53d0..00558769b020 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -419,7 +419,8 @@ def from_file_list(
             # Shortcut when selecting a single channel
             if channel_selector >= len(audio_file_list):
                 raise RuntimeError(
-                    f'Channel cannot be selected: channel_selector={channel_selector}, num_audio_files={len(audio_file_list)}'
+                    f'Channel cannot be selected: channel_selector={channel_selector}, '
+                    f'num_audio_files={len(audio_file_list)}'
                 )
             # Select only a single file
             audio_file_list = [audio_file_list[channel_selector]]