Skip to content

Commit

Permalink
Merge branch 'main' into vsarge/ft_recipes
Browse files Browse the repository at this point in the history
  • Loading branch information
vysarge authored Oct 28, 2024
2 parents 5906fc9 + 869625e commit c5b39e8
Show file tree
Hide file tree
Showing 136 changed files with 4,019 additions and 71 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@ on:
log:
description: Last 2000 characters of the test step's log
value: ${{ jobs.main.outputs.log }}
potential_infra_failure:
description: Boolean flag when infra-related keyword spotted in logs.
value: ${{ jobs.main.outputs.potential_infra_failure }}
jobs:

main:
runs-on: ${{ inputs.RUNNER }}
outputs:
conclusion: ${{ steps.main.conclusion }}
log: ${{ steps.main.outputs.log }}
potential_infra_failure: ${{ steps.main.outputs.potential_infra_failure }}
steps:
- name: Docker system cleanup
run: |
Expand Down Expand Up @@ -75,6 +79,9 @@ jobs:
echo "log=$(tail -c 2000 err.log | base64 -w 0)" >> "$GITHUB_OUTPUT"
potential_infra_failure=$(cat err.log | grep -Eqi "gpu|cuda|device" && echo true || echo false)
echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT"
exit $EXIT_CODE
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
Expand Down
14 changes: 9 additions & 5 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4515,7 +4515,10 @@ jobs:
if: ${{ always() && steps.pipeline-conclusion.outputs.FAILED == 'true' && env.SLACK_WEBHOOK != '' }}
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_ACTOR: ${{ github.actor }}
BRANCH: ${{ github.head_ref || github.ref_name }}
REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
PR_NUMBER: ${{ github.event.number }}
Expand Down Expand Up @@ -4571,13 +4574,15 @@ jobs:
echo "* [$JOB_NAME]($JOB_URL)" | tee -a $GITHUB_STEP_SUMMARY
LOGS=$(echo $JOB | yq '(.value.outputs.log | @base64d)' | tr -d '"')
LOGS=$([[ $(echo $LOGS | wc -c) -gt 0 ]] && echo -E "\`\`\`\n$LOGS\n\`\`\`" || echo "")
LOGS=$([[ $(echo $JOB | yq '.value.outputs.potential_infra_failure') == "true" ]] && echo -E "$LOGS\n\ncc: $SLACK_WEBHOOK_ADMIN" || echo -E "$LOGS")
SUMMARY=$(echo "$SUMMARY" | jq \
--arg pr "<$PR_URL|$PR_TITLE>" \
--arg job "<$JOB_URL|$JOB_NAME>" \
--arg logs "$LOGS" \
--arg author "<https://github.com/${{ github.actor }}|${{ github.actor }}>" \
--arg branch "<https://github.com/$REPOSITORY/tree/${{ github.head_ref || github.ref_name }}|${{ github.head_ref || github.ref_name }}>"\
--arg logs "$(echo -e "$LOGS")" \
--arg author "<https://github.com/$GITHUB_ACTOR|$GITHUB_ACTOR>" \
--arg branch "<https://github.com/$REPOSITORY/tree/$BRANCH|$BRANCH>"\
'. += [
{
"type": "section",
Expand All @@ -4588,8 +4593,7 @@ jobs:
+ "\nJob: " + $job
+ "\nAuthor: " + $author
+ "\nBranch: " + $branch
+ "\nLogs:"
+ "```\n" + $logs + "\n```"
+ "\nLogs:" + $logs
)
}
}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T
# Install NeMo requirements
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG MODELOPT_VERSION=0.17.0
ARG MCORE_TAG=425cdd48d5ef5d360d8033288ff7cb0d378f535f
ARG MCORE_TAG=d357c188323b6928cbcbd6f7e06af04c1694382f

ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
Expand Down
119 changes: 119 additions & 0 deletions examples/audio/conf/masking_with_online_augmentation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
name: "masking_with_online_augmenatation"

model:
sample_rate: 16000
skip_nan_grad: false
num_outputs: 1

train_ds:
use_lhotse: true # enable Lhotse data loader
cuts_path: ??? # path to Lhotse cuts manifest with speech signals for augmentation (including custom "target_recording" field with the same signals)
truncate_duration: 4.0 # Number of STFT time frames = 1 + truncate_duration // encoder.hop_length = 256
truncate_offset_type: random # if the file is longer than truncate_duration, use random offset to select a subsegment
batch_size: 64 # batch size may be increased based on the available memory
shuffle: true
num_workers: 8
pin_memory: true
rir_enabled: true # enable room impulse response augmentation
rir_path: ??? # path to Lhotse recordings manifest with room impulse response signals
noise_path: ??? # path to Lhotse cuts manifest with noise signals

validation_ds:
use_lhotse: true # enable Lhotse data loader
cuts_path: ??? # path to Lhotse cuts manifest with noisy speech signals (including custom "target_recording" field with the clean signals)
batch_size: 64 # batch size may be increased based on the available memory
shuffle: false
num_workers: 4
pin_memory: true

test_ds:
use_lhotse: true # enable Lhotse data loader
cuts_path: ??? # path to Lhotse cuts manifest with noisy speech signals (including custom "target_recording" field with the clean signals)
batch_size: 1 # batch size may be increased based on the available memory
shuffle: false
num_workers: 4
pin_memory: true

encoder:
_target_: nemo.collections.audio.modules.transforms.AudioToSpectrogram
fft_length: 512 # Length of the window and FFT for calculating spectrogram
hop_length: 256 # Hop length for calculating spectrogram

decoder:
_target_: nemo.collections.audio.modules.transforms.SpectrogramToAudio
fft_length: 512 # Length of the window and FFT for calculating spectrogram
hop_length: 256 # Hop length for calculating spectrogram

mask_estimator:
_target_: nemo.collections.audio.modules.masking.MaskEstimatorRNN
num_outputs: ${model.num_outputs}
num_subbands: 257 # Number of subbands of the input spectrogram
num_features: 256 # Number of features at RNN input
num_layers: 5 # Number of RNN layers
bidirectional: true # Use bi-directional RNN

mask_processor:
_target_: nemo.collections.audio.modules.masking.MaskReferenceChannel # Apply mask on the reference channel
ref_channel: 0 # Reference channel for the output

loss:
_target_: nemo.collections.audio.losses.SDRLoss
scale_invariant: true # Use scale-invariant SDR

metrics:
val:
sdr: # output SDR
_target_: torchmetrics.audio.SignalDistortionRatio
test:
sdr_ch0: # SDR on output channel 0
_target_: torchmetrics.audio.SignalDistortionRatio
channel: 0

optim:
name: adamw
lr: 1e-4
# optimizer arguments
betas: [0.9, 0.98]
weight_decay: 1e-3

trainer:
devices: -1 # number of GPUs, -1 would use all available GPUs
num_nodes: 1
max_epochs: -1
max_steps: -1 # computed at runtime if not set
val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
accelerator: auto
strategy: ddp
accumulate_grad_batches: 1
gradient_clip_val: null
precision: 32 # Should be set to 16 for O1 and O2 to enable the AMP.
log_every_n_steps: 25 # Interval of logging.
enable_progress_bar: true
num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
check_val_every_n_epoch: 1 # number of evaluations on validation every n epochs
sync_batchnorm: true
enable_checkpointing: False # Provided by exp_manager
logger: false # Provided by exp_manager

exp_manager:
exp_dir: null
name: ${name}
create_tensorboard_logger: true
create_checkpoint_callback: true
checkpoint_callback_params:
# in case of multiple validation sets, first one is used
monitor: "val_loss"
mode: "min"
save_top_k: 5
always_save_nemo: true # saves the checkpoints as nemo files instead of PTL checkpoints

resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
# you need to set these two to true to continue the training
resume_if_exists: false
resume_ignore_no_checkpoint: false

# You may use this section to create a W&B logger
create_wandb_logger: false
wandb_logger_kwargs:
name: null
project: null
19 changes: 18 additions & 1 deletion examples/multimodal/multimodal_llm/neva/eval/mixtral_eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script to query Mixtral-8x7B as a judge via NGC API for evaluation"""
import argparse
import json
Expand Down Expand Up @@ -61,7 +75,10 @@ def get_eval(content: str, max_tokens: int):
'role': 'system',
'content': 'You are a helpful and precise assistant for checking the quality of the answer.',
},
{'role': 'user', 'content': content,},
{
'role': 'user',
'content': content,
},
],
"temperature": 0.2,
"top_p": 0.7,
Expand Down
6 changes: 5 additions & 1 deletion examples/nlp/dialogue/dialogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,14 @@
@hydra_runner(config_path="conf", config_name="dialogue_config")
def main(cfg: DictConfig) -> None:
pl.seed_everything(42)
logging.warning('This script is no longer supported in NeMo and is scheduled for removal in the 23.11 release.')
logging.info(f'Config: {OmegaConf.to_yaml(cfg)}')

try:
strategy = NLPDDPStrategy(no_ddp_communication_hook=True, find_unused_parameters=True,)
strategy = NLPDDPStrategy(
no_ddp_communication_hook=True,
find_unused_parameters=True,
)
except (ImportError, ModuleNotFoundError):
strategy = 'auto'

Expand Down
14 changes: 14 additions & 0 deletions examples/nlp/rag/rag_generating.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from llama_index.core import Settings, StorageContext, load_index_from_storage

from nemo.collections.nlp.models.rag.custom_bert_embedder import NeMoBertEmbeddings
Expand Down
14 changes: 14 additions & 0 deletions examples/nlp/rag/rag_indexing.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

from nemo.core.classes.mixins import adapter_mixins
Expand Down
7 changes: 7 additions & 0 deletions nemo/collections/audio/data/audio_to_audio_lhotse.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ def __getitem__(self, cuts: CutSet) -> dict[str, torch.Tensor]:
retained_cuts = [
cut._first_non_padding_cut if isinstance(cut, MixedCut) else cut for cut in retained_padded_cuts
]

# if online augmentation is applied, some retained cuts still may be MixedCuts (including the original speech, noise, and augmentation)
# get the first non-padding cut from there, which is supposed to be the clean speech signal
for n, cut in enumerate(retained_cuts):
if isinstance(cut, MixedCut):
retained_cuts[n] = cut._first_non_padding_cut
# create cutset
retained_cuts = CutSet.from_cuts(retained_cuts)

if _key_available(retained_cuts, self.TARGET_KEY):
Expand Down
14 changes: 14 additions & 0 deletions nemo/collections/common/metrics/perf_metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, Dict, List, Optional

import numpy as np
Expand Down
14 changes: 14 additions & 0 deletions nemo/collections/common/parts/perf_metrics_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import os
from typing import List
Expand Down
14 changes: 14 additions & 0 deletions nemo/collections/common/prompts/canary.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any

import torch
Expand Down
14 changes: 14 additions & 0 deletions nemo/collections/common/prompts/example.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Implemented following the guide at https://www.promptingguide.ai/models/phi-2#phi-2-usage
"""
Expand Down
Loading

0 comments on commit c5b39e8

Please sign in to comment.