Skip to content

Commit

Permalink
Merge branch 'r2.0.0rc1' into dpykhtar/torch_dist_as_default
Browse files Browse the repository at this point in the history
  • Loading branch information
dimapihtar authored Jul 25, 2024
2 parents 4e4901a + be21e95 commit 52b4a27
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ jobs:
with:
RUNNER: self-hosted-azure
SCRIPT: |
pytest tests/collections/asr/decoding/rnnt_alignments_check.py --durations=-1
pytest tests/collections/asr/decoding/rnnt_alignments_check.py --durations=-1 --with_downloads
# L2: Segmentation Tool
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav:
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.01-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3

# build an image that includes only the nemo dependencies, ensures that dependencies
# are included first for optimal caching, and useful for building a development
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,9 @@ def _enable_nvidia_optimizations(self):
self.cfg.persist_layer_norm = False

# NVFUSER available starting with 21.11
if NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11):
if (NVIDIA_TORCH_MAJOR >= 21 or (NVIDIA_TORCH_MAJOR == 21 and NVIDIA_TORCH_MINOR >= 11)) and (
NVIDIA_TORCH_MAJOR < 23 or (NVIDIA_TORCH_MAJOR == 23 and NVIDIA_TORCH_MINOR < 11)
):

# NVFUSER
torch._C._jit_set_profiling_executor(True)
Expand Down
3 changes: 2 additions & 1 deletion nemo/lightning/pytorch/callbacks/model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str)
self.set_checkpoint_unfinished_marker(filepath, barrier_after=True)
ema_callback = self._ema_callback(trainer)

self._last_global_step_saved = trainer.global_step

if ema_callback is not None:
if self.async_save:
raise ValueError('async_save with EMA not supported')
Expand Down Expand Up @@ -422,7 +424,6 @@ def _get_finalize_save_checkpoint_callback(

def _cb():
logging.debug(f'Finalize callback called for step {global_step}, filepath {filepath}')
self._last_global_step_saved = global_step
self._last_checkpoint_saved = filepath

from nemo.utils.get_rank import is_global_rank_zero
Expand Down
80 changes: 68 additions & 12 deletions tests/collections/asr/decoding/rnnt_alignments_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,66 @@
# these tests outside of the CI machines environment, where test data is
# stored

import os
from pathlib import Path
from typing import Union

import pytest
import torch.cuda
from examples.asr.transcribe_speech import TranscriptionConfig
from omegaconf import OmegaConf

from nemo.collections.asr.parts.utils.transcribe_utils import prepare_audio_data, setup_model
from nemo.collections.asr.models import EncDecRNNTBPEModel
from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest
from nemo.collections.asr.parts.utils.transcribe_utils import prepare_audio_data

DEVICES = []

if torch.cuda.is_available():
DEVICES.append('cuda')

TEST_DATA_PATH = "/home/TestData/an4_dataset/an4_val.json"
PRETRAINED_MODEL_NAME = "stt_en_conformer_transducer_small"

@pytest.fixture(scope="module")
def stt_en_conformer_transducer_small_model():
model = EncDecRNNTBPEModel.from_pretrained(model_name="stt_en_conformer_transducer_small", map_location="cpu")
return model

def get_rnnt_alignments(strategy: str, loop_labels: bool = True, use_cuda_graph_decoder=False, location="cuda"):
cfg = OmegaConf.structured(TranscriptionConfig(pretrained_name=PRETRAINED_MODEL_NAME))

@pytest.fixture(scope="module")
def an4_val_manifest_corrected(tmp_path_factory, test_data_dir):
"""
Correct an4_val manifest audio filepaths, e.g.,
"tests/data/asr/test/an4/wav/an440-mjgm-b.wav" -> test_data_dir / "test/an4/wav/an440-mjgm-b.wav"
"""
an4_val_manifest_orig_path = Path(test_data_dir) / "asr/an4_val.json"
an4_val_manifest_corrected_path = tmp_path_factory.mktemp("manifests") / "an4_val_corrected.json"
an4_val_records = read_manifest(an4_val_manifest_orig_path)
for record in an4_val_records:
record["audio_filepath"] = record["audio_filepath"].replace(
"tests/data/asr", str(an4_val_manifest_orig_path.resolve().parent)
)
write_manifest(an4_val_manifest_corrected_path, an4_val_records)
return an4_val_manifest_corrected_path


def get_rnnt_alignments(
strategy: str,
manifest_path: Union[Path, str],
model: EncDecRNNTBPEModel,
loop_labels: bool = True,
use_cuda_graph_decoder=False,
device="cuda",
):
cfg = OmegaConf.structured(TranscriptionConfig())
cfg.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True
cfg.rnnt_decoding.preserve_alignments = True
cfg.rnnt_decoding.strategy = strategy
if cfg.rnnt_decoding.strategy == "greedy_batch":
cfg.rnnt_decoding.greedy.loop_labels = loop_labels
cfg.rnnt_decoding.greedy.use_cuda_graph_decoder = use_cuda_graph_decoder
cfg.dataset_manifest = TEST_DATA_PATH
cfg.dataset_manifest = str(manifest_path)
filepaths = prepare_audio_data(cfg)[0][:10] # selecting 10 files only

model = setup_model(cfg, map_location=location)[0]
model = model.to(device)
model.change_decoding_strategy(cfg.rnnt_decoding)

transcriptions = model.transcribe(
Expand Down Expand Up @@ -72,16 +109,35 @@ def cleanup_local_folder():


# TODO: add the same tests for multi-blank RNNT decoding
@pytest.mark.skipif(not os.path.exists('/home/TestData'), reason='Not a Jenkins machine')
@pytest.mark.parametrize("device", DEVICES)
@pytest.mark.parametrize("loop_labels", [True, False])
@pytest.mark.parametrize("use_cuda_graph_decoder", [True, False])
def test_rnnt_alignments(loop_labels: bool, use_cuda_graph_decoder: bool):
@pytest.mark.with_downloads
def test_rnnt_alignments(
loop_labels: bool,
use_cuda_graph_decoder: bool,
device: str,
an4_val_manifest_corrected,
stt_en_conformer_transducer_small_model,
):
if use_cuda_graph_decoder and device != "cuda":
pytest.skip("CUDA decoder works only with CUDA")
if not loop_labels and use_cuda_graph_decoder:
pytest.skip("Frame-Looping algorithm with CUDA graphs does not yet support alignments")
# using greedy as baseline and comparing all other configurations to it
ref_transcriptions = get_rnnt_alignments("greedy")
ref_transcriptions = get_rnnt_alignments(
"greedy",
manifest_path=an4_val_manifest_corrected,
model=stt_en_conformer_transducer_small_model,
device=device,
)
transcriptions = get_rnnt_alignments(
"greedy_batch", loop_labels=loop_labels, use_cuda_graph_decoder=use_cuda_graph_decoder
"greedy_batch",
loop_labels=loop_labels,
use_cuda_graph_decoder=use_cuda_graph_decoder,
manifest_path=an4_val_manifest_corrected,
model=stt_en_conformer_transducer_small_model,
device=device,
)
# comparing that label sequence in alignments is exactly the same
# we can't compare logits as well, because they are expected to be
Expand Down
9 changes: 7 additions & 2 deletions tests/collections/common/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@


class TestCommonMetrics:
top_k_logits = torch.tensor([[0.1, 0.3, 0.2, 0.0], [0.9, 0.6, 0.2, 0.3], [0.2, 0.1, 0.4, 0.3]],) # 1 # 0 # 2
top_k_logits = torch.tensor(
[[0.1, 0.3, 0.2, 0.0], [0.9, 0.6, 0.2, 0.3], [0.2, 0.1, 0.4, 0.3]],
) # 1 # 0 # 2

@pytest.mark.unit
def test_top_1_accuracy(self):
Expand Down Expand Up @@ -130,7 +132,10 @@ def test_top_1_accuracy_distributed_uneven_batch(self):
class TestPerplexity(PerplexityTester):
def test_perplexity(self, ddp, dist_sync_on_step, probs, logits):
self.run_class_perplexity_test(
ddp=ddp, probs=probs, logits=logits, dist_sync_on_step=dist_sync_on_step,
ddp=ddp,
probs=probs,
logits=logits,
dist_sync_on_step=dist_sync_on_step,
)


Expand Down
8 changes: 8 additions & 0 deletions tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deprecation Warning\n",
"This tutorial is deprecated and no longer supported in NeMo. The notebook will be removed in the 24.09 release."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 52b4a27

Please sign in to comment.