diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 000000000000..ef7434efe377
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,2 @@
+.github/ @pablo-garay @ko3n1g
+Dockerfile.ci @pablo-garay @ko3n1g
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 46182745e52d..2ac3ef606dd8 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -256,11 +256,12 @@ jobs:
quantization.num_calib_size=8 \
inference.batch_size=2 \
export.inference_tensor_parallel=2 \
+ export.sample_output=False \
export.save_path=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
- L2_PTQ_Llama2_INT8_SQ:
+ OPTIONAL_L2_PTQ_Llama2_INT8_SQ:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
@@ -268,14 +269,16 @@ jobs:
TIMEOUT: 15
SCRIPT: |
python examples/nlp/language_modeling/megatron_gpt_ptq.py \
- model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
- quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
- quantization.algorithm=int8_sq \
- quantization.num_calib_size=8 \
- inference.batch_size=2 \
- export.save_path=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+ model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+ quantization.algorithm=int8_sq \
+ quantization.num_calib_size=8 \
+ inference.batch_size=2 \
+ export.sample_output=False \
+ export.save_path=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+ IS_OPTIONAL: true
# TODO: investigate int4_awq stuck issues and restore the test
#L2_PTQ_Llama2_INT4_AWQ:
@@ -310,44 +313,42 @@ jobs:
#- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
# if: "failure()"
- L2_QAT_Llama2_INT4:
- needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/tuning/megatron_gpt_qat.py \
- quantization.algorithm=int4 \
- quantization.num_calib_size=8 \
- trainer.devices=1 \
- trainer.num_nodes=1 \
- trainer.max_steps=4 \
- trainer.val_check_interval=4 \
- +trainer.limit_val_batches=2 \
- exp_manager.explicit_log_dir=llama2_qat_results \
- model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
- model.tensor_model_parallel_size=1 \
- model.pipeline_model_parallel_size=1 \
- model.global_batch_size=2 \
- model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
- model.data.train_ds.concat_sampling_probabilities=[1.0] \
- model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl]
-
- rm -rf llama2_qat_results
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ # OPTIONAL_L2_QAT_Llama2_INT4:
+ # needs: [cicd-test-container-setup]
+ # runs-on: self-hosted-azure
+ # timeout-minutes: 10
+ # container:
+ # image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+ # options:
+ # # --user 0:128
+ # --device=/dev/nvidia0
+ # --gpus all
+ # --shm-size=8g
+ # --env TRANSFORMERS_OFFLINE=0
+ # --env HYDRA_FULL_ERROR=1
+ # --volume /mnt/datadrive/TestData:/home/TestData
+ # steps:
+ # - name: Checkout repository
+ # uses: actions/checkout@v4
+ # - run: |
+ # python examples/nlp/language_modeling/tuning/megatron_gpt_qat.py \
+ # quantization.algorithm=int4 \
+ # quantization.num_calib_size=8 \
+ # trainer.devices=1 \
+ # trainer.num_nodes=1 \
+ # trainer.max_steps=4 \
+ # trainer.val_check_interval=4 \
+ # +trainer.limit_val_batches=2 \
+ # exp_manager.explicit_log_dir=llama2_qat_results \
+ # model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+ # model.tensor_model_parallel_size=1 \
+ # model.pipeline_model_parallel_size=1 \
+ # model.global_batch_size=2 \
+ # model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ # model.data.train_ds.concat_sampling_probabilities=[1.0] \
+ # model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl]
+
+ # rm -rf llama2_qat_results
# L2: ASR dev run
ASR_dev_run_Speech_to_Text:
@@ -810,7 +811,7 @@ jobs:
with:
RUNNER: self-hosted-azure
SCRIPT: |
- pytest tests/collections/asr/decoding/rnnt_alignments_check.py --durations=-1
+ pytest tests/collections/asr/decoding/rnnt_alignments_check.py --durations=-1 --with_downloads
# L2: Segmentation Tool
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav:
@@ -1869,250 +1870,210 @@ jobs:
L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=10 \
- trainer.precision=bf16 \
- model.megatron_amp_O2=True \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
-
- NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=20 \
- trainer.precision=bf16 \
- model.megatron_amp_O2=True \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- exp_manager.resume_if_exists=True \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=10 \
+ trainer.precision=bf16 \
+ model.megatron_amp_O2=True \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+
+ NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=20 \
+ trainer.precision=bf16 \
+ model.megatron_amp_O2=True \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ exp_manager.resume_if_exists=True \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
L2_Megatron_Bert_Pretraining_and_Resume_Training:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=10 \
- trainer.precision=bf16 \
- model.megatron_amp_O2=True \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.sequence_parallel=True \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
-
- NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=20 \
- trainer.precision=bf16 \
- model.megatron_amp_O2=True \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- exp_manager.resume_if_exists=True \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
-
- rm -rf examples/nlp/language_modeling/bert_pretrain_results
- rm -rf examples/nlp/language_modeling/bert_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=10 \
+ trainer.precision=bf16 \
+ model.megatron_amp_O2=True \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.sequence_parallel=True \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+
+ NVTE_FUSED_ATTN=0 NVTE_FLASH_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=20 \
+ trainer.precision=bf16 \
+ model.megatron_amp_O2=True \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ exp_manager.resume_if_exists=True \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/bert_pretrain_results
+ rm -rf examples/nlp/language_modeling/bert_index_mappings
L2_Megatron_Core_Bert_Pretraining_and_Resume_Training:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- NVTE_FLASH_ATTN=0 NVTE_FUSED_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=10 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- model.mcore_bert=True \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.sequence_parallel=True \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method='block' \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
-
- NVTE_FLASH_ATTN=0 NVTE_FUSED_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=10 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=20 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
- exp_manager.resume_if_exists=True \
- model.mcore_bert=True \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method='block' \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
-
- rm -rf examples/nlp/language_modeling/bert_pretrain_results
- rm -rf examples/nlp/language_modeling/bert_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ NVTE_FLASH_ATTN=0 NVTE_FUSED_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=10 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ model.mcore_bert=True \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.sequence_parallel=True \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method='block' \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+
+ NVTE_FLASH_ATTN=0 NVTE_FUSED_ATTN=0 python examples/nlp/language_modeling/megatron_bert_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=10 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=20 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
+ exp_manager.resume_if_exists=True \
+ model.mcore_bert=True \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method='block' \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/bert_pretrain_results
+ rm -rf examples/nlp/language_modeling/bert_index_mappings
L2_Megatron_RETRO_Pretraining_and_Resume_Training:
needs: [cicd-test-container-setup]
@@ -2336,65 +2297,37 @@ jobs:
L2_RAG_Pipeline_Indexing:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/rag/rag_indexing.py \
- trainer.num_nodes=1 \
- trainer.devices=1 \
- trainer.precision='bf16-mixed' \
- indexing.embedder.model_path='/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo' \
- indexing.embedder.embed_batch_size=128 \
- indexing.data.data_path='/home/TestData/nlp/rag_pipeline/testing_data/corpus_data/sample_data' \
- indexing.data.chunk_size=256 \
- indexing.data.chunk_overlap=10 \
- indexing.index_path='/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index'
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/rag/rag_indexing.py \
+ trainer.num_nodes=1 \
+ trainer.devices=1 \
+ trainer.precision='bf16-mixed' \
+ indexing.embedder.model_path='/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo' \
+ indexing.embedder.embed_batch_size=128 \
+ indexing.data.data_path='/home/TestData/nlp/rag_pipeline/testing_data/corpus_data/sample_data' \
+ indexing.data.chunk_size=256 \
+ indexing.data.chunk_overlap=10 \
+ indexing.index_path='/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index'
L2_RAG_Pipeline_Generating:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/rag/rag_generating.py \
- trainer.devices=1 \
- trainer.precision='bf16-mixed' \
- indexing.embedder.model_path='/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo' \
- indexing.index_path='/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index' \
- generating.llm.model_path='/home/TestData/nlp/rag_pipeline/testing_models/llms/megatron_gpt_125m.nemo' \
- generating.inference.tokens_to_generate=50 \
- generating.inference.greedy=False \
- generating.inference.temperature=1.0 \
- generating.query='Which art schools did I applied to?'
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/rag/rag_generating.py \
+ trainer.devices=1 \
+ trainer.precision='bf16-mixed' \
+ indexing.embedder.model_path='/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo' \
+ indexing.index_path='/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index' \
+ generating.llm.model_path='/home/TestData/nlp/rag_pipeline/testing_models/llms/megatron_gpt_125m.nemo' \
+ generating.inference.tokens_to_generate=50 \
+ generating.inference.greedy=False \
+ generating.inference.temperature=1.0 \
+ generating.query='Which art schools did I applied to?'
L2_BioMegatron_Bert_NER_Task:
needs: [cicd-test-container-setup]
@@ -2413,191 +2346,163 @@ jobs:
L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=2 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=3 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=1 \
- model.optim.sched.constant_steps=1 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.bias=False \
- model.bias_activation_fusion=False \
- model.bias_dropout_add_fusion=False \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_granularity=full \
- model.activations_checkpoint_num_layers=1 \
- model.data.validation_drop_last=False \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=2 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=6 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- exp_manager.resume_if_exists=True \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.bias=False \
- model.bias_activation_fusion=False \
- model.bias_dropout_add_fusion=False \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_granularity=full \
- model.activations_checkpoint_num_layers=1 \
- model.data.validation_drop_last=False \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- rm -rf examples/nlp/language_modeling/gpt_pretrain_results
- rm -rf examples/nlp/language_modeling/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=2 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=3 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=1 \
+ model.optim.sched.constant_steps=1 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.bias=False \
+ model.bias_activation_fusion=False \
+ model.bias_dropout_add_fusion=False \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_granularity=full \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.validation_drop_last=False \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=2 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=6 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ exp_manager.resume_if_exists=True \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.bias=False \
+ model.bias_activation_fusion=False \
+ model.bias_dropout_add_fusion=False \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_granularity=full \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.validation_drop_last=False \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_pretrain_results
+ rm -rf examples/nlp/language_modeling/gpt_index_mappings
L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=2 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=3 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=1 \
- model.optim.sched.constant_steps=1 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.position_embedding_type=rope \
- model.rotary_percentage=0.5 \
- model.bias=False \
- model.bias_activation_fusion=False \
- model.bias_dropout_add_fusion=False \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_granularity=full \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- # commented out to save time on github ci @adithyare
- # python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- # trainer.devices=2 \
- # trainer.accelerator=gpu \
- # trainer.log_every_n_steps=1 \
- # trainer.val_check_interval=2 \
- # trainer.limit_val_batches=1 \
- # trainer.accumulate_grad_batches=1 \
- # trainer.max_steps=6 \
- # trainer.gradient_clip_val=1.0 \
- # exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- # exp_manager.resume_if_exists=True \
- # model.tensor_model_parallel_size=2 \
- # model.optim.name=fused_adam \
- # model.optim.lr=2e-4 \
- # model.optim.sched.warmup_steps=2 \
- # model.optim.sched.constant_steps=2 \
- # model.optim.sched.min_lr=8e-5 \
- # model.max_position_embeddings=128 \
- # model.encoder_seq_length=128 \
- # model.data.seq_length=128 \
- # model.position_embedding_type=rope \
- # model.rotary_percentage=0.5 \
- # model.normalization=rmsnorm \
- # model.bias=False \
- # model.bias_activation_fusion=False \
- # model.bias_dropout_add_fusion=False \
- # model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- # model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- # model.num_layers=8 \
- # model.hidden_size=256 \
- # model.num_attention_heads=8 \
- # model.activations_checkpoint_method=block \
- # model.activations_checkpoint_granularity=full \
- # model.activations_checkpoint_num_layers=1 \
- # model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- # model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-
- rm -rf examples/nlp/language_modeling/gpt_pretrain_results
- rm -rf examples/nlp/language_modeling/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=2 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=3 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=1 \
+ model.optim.sched.constant_steps=1 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.position_embedding_type=rope \
+ model.rotary_percentage=0.5 \
+ model.bias=False \
+ model.bias_activation_fusion=False \
+ model.bias_dropout_add_fusion=False \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_granularity=full \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+
+ # commented out to save time on github ci @adithyare
+ # python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ # trainer.devices=2 \
+ # trainer.accelerator=gpu \
+ # trainer.log_every_n_steps=1 \
+ # trainer.val_check_interval=2 \
+ # trainer.limit_val_batches=1 \
+ # trainer.accumulate_grad_batches=1 \
+ # trainer.max_steps=6 \
+ # trainer.gradient_clip_val=1.0 \
+ # exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ # exp_manager.resume_if_exists=True \
+ # model.tensor_model_parallel_size=2 \
+ # model.optim.name=fused_adam \
+ # model.optim.lr=2e-4 \
+ # model.optim.sched.warmup_steps=2 \
+ # model.optim.sched.constant_steps=2 \
+ # model.optim.sched.min_lr=8e-5 \
+ # model.max_position_embeddings=128 \
+ # model.encoder_seq_length=128 \
+ # model.data.seq_length=128 \
+ # model.position_embedding_type=rope \
+ # model.rotary_percentage=0.5 \
+ # model.normalization=rmsnorm \
+ # model.bias=False \
+ # model.bias_activation_fusion=False \
+ # model.bias_dropout_add_fusion=False \
+ # model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ # model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ # model.num_layers=8 \
+ # model.hidden_size=256 \
+ # model.num_attention_heads=8 \
+ # model.activations_checkpoint_method=block \
+ # model.activations_checkpoint_granularity=full \
+ # model.activations_checkpoint_num_layers=1 \
+ # model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ # model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_pretrain_results
+ rm -rf examples/nlp/language_modeling/gpt_index_mappings
# This test requires Ampere but some of the test GPUs are Volta
# Need to add a check for compute capability before uncommenting this test
@@ -2693,275 +2598,233 @@ jobs:
L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=3 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=3 \
- trainer.precision=bf16 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.megatron_amp_O2=True \
- model.optim.name=distributed_fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=3 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=6 \
- trainer.precision=bf16 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- exp_manager.resume_if_exists=True \
- model.reset_lr=True \
- model.tensor_model_parallel_size=2 \
- model.megatron_amp_O2=True \
- model.optim.name=distributed_fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=2 \
- model.optim.sched.constant_steps=2 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- rm -rf examples/nlp/language_modeling/gpt_pretrain_results
- rm -rf examples/nlp/language_modeling/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=3 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=3 \
+ trainer.precision=bf16 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.megatron_amp_O2=True \
+ model.optim.name=distributed_fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=3 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=6 \
+ trainer.precision=bf16 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ exp_manager.resume_if_exists=True \
+ model.reset_lr=True \
+ model.tensor_model_parallel_size=2 \
+ model.megatron_amp_O2=True \
+ model.optim.name=distributed_fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=2 \
+ model.optim.sched.constant_steps=2 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_pretrain_results
+ rm -rf examples/nlp/language_modeling/gpt_index_mappings
L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=2 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=3 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=1 \
- model.optim.sched.constant_steps=1 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.position_embedding_type=alibi \
- model.bias=False \
- model.bias_activation_fusion=False \
- model.bias_dropout_add_fusion=False \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_granularity=full \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- # not testing resume functionality to save time on ci @adithyare
- #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- #trainer.devices=2 \
- #trainer.accelerator=gpu \
- #trainer.log_every_n_steps=1 \
- #trainer.val_check_interval=2 \
- #trainer.limit_val_batches=1 \
- #trainer.accumulate_grad_batches=1 \
- #trainer.max_steps=6 \
- #trainer.gradient_clip_val=1.0 \
- #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- #exp_manager.resume_if_exists=True \
- #model.tensor_model_parallel_size=2 \
- #model.optim.name=fused_adam \
- #model.optim.lr=2e-4 \
- #model.optim.sched.warmup_steps=2 \
- #model.optim.sched.constant_steps=2 \
- #model.optim.sched.min_lr=8e-5 \
- #model.max_position_embeddings=128 \
- #model.encoder_seq_length=128 \
- #model.data.seq_length=128 \
- #model.position_embedding_type=alibi \
- #model.normalization=rmsnorm \
- #model.bias=False \
- #model.bias_activation_fusion=False \
- #model.bias_dropout_add_fusion=False \
- #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- #model.num_layers=8 \
- #model.hidden_size=256 \
- #model.num_attention_heads=8 \
- #model.activations_checkpoint_method=block \
- #model.activations_checkpoint_granularity=full \
- #model.activations_checkpoint_num_layers=1 \
- #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-
- rm -rf examples/nlp/language_modeling/gpt_pretrain_results
- rm -rf examples/nlp/language_modeling/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=2 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=3 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=1 \
+ model.optim.sched.constant_steps=1 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.position_embedding_type=alibi \
+ model.bias=False \
+ model.bias_activation_fusion=False \
+ model.bias_dropout_add_fusion=False \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_granularity=full \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+
+ # not testing resume functionality to save time on ci @adithyare
+ #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ #trainer.devices=2 \
+ #trainer.accelerator=gpu \
+ #trainer.log_every_n_steps=1 \
+ #trainer.val_check_interval=2 \
+ #trainer.limit_val_batches=1 \
+ #trainer.accumulate_grad_batches=1 \
+ #trainer.max_steps=6 \
+ #trainer.gradient_clip_val=1.0 \
+ #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ #exp_manager.resume_if_exists=True \
+ #model.tensor_model_parallel_size=2 \
+ #model.optim.name=fused_adam \
+ #model.optim.lr=2e-4 \
+ #model.optim.sched.warmup_steps=2 \
+ #model.optim.sched.constant_steps=2 \
+ #model.optim.sched.min_lr=8e-5 \
+ #model.max_position_embeddings=128 \
+ #model.encoder_seq_length=128 \
+ #model.data.seq_length=128 \
+ #model.position_embedding_type=alibi \
+ #model.normalization=rmsnorm \
+ #model.bias=False \
+ #model.bias_activation_fusion=False \
+ #model.bias_dropout_add_fusion=False \
+ #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ #model.num_layers=8 \
+ #model.hidden_size=256 \
+ #model.num_attention_heads=8 \
+ #model.activations_checkpoint_method=block \
+ #model.activations_checkpoint_granularity=full \
+ #model.activations_checkpoint_num_layers=1 \
+ #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_pretrain_results
+ rm -rf examples/nlp/language_modeling/gpt_index_mappings
L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- trainer.devices=2 \
- trainer.accelerator=gpu \
- trainer.log_every_n_steps=1 \
- trainer.val_check_interval=2 \
- trainer.limit_val_batches=2 \
- trainer.accumulate_grad_batches=1 \
- trainer.max_steps=3 \
- trainer.gradient_clip_val=1.0 \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- model.tensor_model_parallel_size=2 \
- model.optim.name=fused_adam \
- model.optim.lr=2e-4 \
- model.optim.sched.warmup_steps=1 \
- model.optim.sched.constant_steps=1 \
- model.optim.sched.min_lr=8e-5 \
- model.max_position_embeddings=128 \
- model.encoder_seq_length=128 \
- model.data.seq_length=128 \
- model.position_embedding_type=kerple \
- model.bias=False \
- model.bias_activation_fusion=False \
- model.bias_dropout_add_fusion=False \
- model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- model.num_layers=8 \
- model.hidden_size=256 \
- model.num_attention_heads=8 \
- model.activations_checkpoint_method=block \
- model.activations_checkpoint_granularity=full \
- model.activations_checkpoint_num_layers=1 \
- model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
-
- # commented out to save time on github ci @adithyare
- #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
- #trainer.devices=2 \
- #trainer.accelerator=gpu \
- #trainer.log_every_n_steps=1 \
- #trainer.val_check_interval=2 \
- #trainer.limit_val_batches=1 \
- #trainer.accumulate_grad_batches=1 \
- #trainer.max_steps=6 \
- #trainer.precision=16 \
- #trainer.gradient_clip_val=1.0 \
- #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
- #exp_manager.resume_if_exists=True \
- #model.tensor_model_parallel_size=2 \
- #model.optim.name=fused_adam \
- #model.optim.lr=2e-4 \
- #model.optim.sched.warmup_steps=2 \
- #model.optim.sched.constant_steps=2 \
- #model.optim.sched.min_lr=8e-5 \
- #model.max_position_embeddings=128 \
- #model.encoder_seq_length=128 \
- #model.data.seq_length=128 \
- #model.position_embedding_type=kerple \
- #model.normalization=rmsnorm \
- #model.bias=False \
- #model.bias_activation_fusion=False \
- #model.bias_dropout_add_fusion=False \
- #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- #model.num_layers=8 \
- #model.hidden_size=256 \
- #model.num_attention_heads=8 \
- #model.activations_checkpoint_method=block \
- #model.activations_checkpoint_granularity=full \
- #model.activations_checkpoint_num_layers=1 \
- #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
- #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
-
- rm -rf examples/nlp/language_modeling/gpt_pretrain_results
- rm -rf examples/nlp/language_modeling/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ trainer.devices=2 \
+ trainer.accelerator=gpu \
+ trainer.log_every_n_steps=1 \
+ trainer.val_check_interval=2 \
+ trainer.limit_val_batches=2 \
+ trainer.accumulate_grad_batches=1 \
+ trainer.max_steps=3 \
+ trainer.gradient_clip_val=1.0 \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ model.tensor_model_parallel_size=2 \
+ model.optim.name=fused_adam \
+ model.optim.lr=2e-4 \
+ model.optim.sched.warmup_steps=1 \
+ model.optim.sched.constant_steps=1 \
+ model.optim.sched.min_lr=8e-5 \
+ model.max_position_embeddings=128 \
+ model.encoder_seq_length=128 \
+ model.data.seq_length=128 \
+ model.position_embedding_type=kerple \
+ model.bias=False \
+ model.bias_activation_fusion=False \
+ model.bias_dropout_add_fusion=False \
+ model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ model.num_layers=8 \
+ model.hidden_size=256 \
+ model.num_attention_heads=8 \
+ model.activations_checkpoint_method=block \
+ model.activations_checkpoint_granularity=full \
+ model.activations_checkpoint_num_layers=1 \
+ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
+
+ # commented out to save time on github ci @adithyare
+ #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
+ #trainer.devices=2 \
+ #trainer.accelerator=gpu \
+ #trainer.log_every_n_steps=1 \
+ #trainer.val_check_interval=2 \
+ #trainer.limit_val_batches=1 \
+ #trainer.accumulate_grad_batches=1 \
+ #trainer.max_steps=6 \
+ #trainer.precision=16 \
+ #trainer.gradient_clip_val=1.0 \
+ #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
+ #exp_manager.resume_if_exists=True \
+ #model.tensor_model_parallel_size=2 \
+ #model.optim.name=fused_adam \
+ #model.optim.lr=2e-4 \
+ #model.optim.sched.warmup_steps=2 \
+ #model.optim.sched.constant_steps=2 \
+ #model.optim.sched.min_lr=8e-5 \
+ #model.max_position_embeddings=128 \
+ #model.encoder_seq_length=128 \
+ #model.data.seq_length=128 \
+ #model.position_embedding_type=kerple \
+ #model.normalization=rmsnorm \
+ #model.bias=False \
+ #model.bias_activation_fusion=False \
+ #model.bias_dropout_add_fusion=False \
+ #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ #model.num_layers=8 \
+ #model.hidden_size=256 \
+ #model.num_attention_heads=8 \
+ #model.activations_checkpoint_method=block \
+ #model.activations_checkpoint_granularity=full \
+ #model.activations_checkpoint_num_layers=1 \
+ #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
+ #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_pretrain_results
+ rm -rf examples/nlp/language_modeling/gpt_index_mappings
L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2:
needs: [cicd-test-container-setup]
@@ -3119,49 +2982,35 @@ jobs:
L2_Megatron_GPT_Finetuning_StarCoder_PP1:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure-gpus-1
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- # --user 0:128
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --env HYDRA_FULL_ERROR=1
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
- trainer.devices=1 \
- trainer.num_nodes=1 \
- trainer.precision=bf16 \
- trainer.max_steps=4 \
- trainer.val_check_interval=4 \
- trainer.enable_checkpointing=False \
- +trainer.limit_val_batches=2 \
- +trainer.limit_test_batches=2 \
- exp_manager.checkpoint_callback_params.save_best_model=False \
- exp_manager.exp_dir=examples/nlp/language_modeling/gpt_sft_results \
- model.peft.peft_scheme=none \
- model.optim.name=distributed_fused_adam \
- model.restore_from_path=/home/TestData/nlp/megatron_gpt/starcoder-ci-nemo/megatron_starcoder_tp1_pp1.nemo \
- model.tensor_model_parallel_size=1 \
- model.pipeline_model_parallel_size=1 \
- model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
- model.data.train_ds.num_workers=0 \
- model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
- model.data.validation_ds.num_workers=0 \
- model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
- model.data.test_ds.num_workers=0 \
- model.data.train_ds.concat_sampling_probabilities=[1.0]
-
- rm -rf examples/nlp/language_modeling/gpt_sft_results
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure-gpus-1
+ SCRIPT: |
+ python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
+ trainer.devices=1 \
+ trainer.num_nodes=1 \
+ trainer.precision=bf16 \
+ trainer.max_steps=4 \
+ trainer.val_check_interval=4 \
+ trainer.enable_checkpointing=False \
+ +trainer.limit_val_batches=2 \
+ +trainer.limit_test_batches=2 \
+ exp_manager.checkpoint_callback_params.save_best_model=False \
+ exp_manager.exp_dir=examples/nlp/language_modeling/gpt_sft_results \
+ model.peft.peft_scheme=none \
+ model.optim.name=distributed_fused_adam \
+ model.restore_from_path=/home/TestData/nlp/megatron_gpt/starcoder-ci-nemo/megatron_starcoder_tp1_pp1.nemo \
+ model.tensor_model_parallel_size=1 \
+ model.pipeline_model_parallel_size=1 \
+ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ model.data.train_ds.num_workers=0 \
+ model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ model.data.validation_ds.num_workers=0 \
+ model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ model.data.test_ds.num_workers=0 \
+ model.data.train_ds.concat_sampling_probabilities=[1.0]
+ AFTER_SCRIPT: |
+ rm -rf examples/nlp/language_modeling/gpt_sft_results
L2_Megatron_GPT_Embedding:
needs: [cicd-test-container-setup]
@@ -3305,6 +3154,62 @@ jobs:
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/lora_tuning_tp2
+ L2_Megatron_GPT_PEFT_Lora_TP2SP1:
+ needs: [cicd-test-container-setup]
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure-gpus-2-h100
+ SCRIPT: |
+ rm -rf /home/TestData/nlp/lora_tuning_tp2_sp1
+
+ CUDA_DEVICE_MAX_CONNECTIONS=1 NVTE_FLASH_ATTN=0 NVTE_FUSED_ATTN=1 python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
+ trainer.devices=2 \
+ trainer.log_every_n_steps=1 \
+ trainer.max_epochs=9999 \
+ trainer.max_steps=3 \
+ trainer.val_check_interval=3 \
+ ++trainer.limit_val_batches=2 \
+ trainer.precision=bf16 \
+ exp_manager.exp_dir=/home/TestData/nlp/lora_tuning_tp2_sp1 \
+ +model.mcore_gpt=True \
+ model.pipeline_model_parallel_size=1 \
+ model.tensor_model_parallel_size=2 \
+ model.sequence_parallel=True \
+ model.megatron_amp_O2=True \
+ model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
+ +model.fp8=True \
+ +model.fp8_params=True \
+ +model.fp8_hybrid=True \
+ +model.fp8_e4m3=False \
+ +model.fp8_interval=1 \
+ +model.fp8_margin=0 \
+ +model.fp8_amax_history_len=32 \
+ +model.fp8_amax_compute_algo=max \
+ +model.reduce_amax=False \
+ +model.ub_tp_comm_overlap=False \
+ +model.tp_comm_overlap_ag=False \
+ +model.tp_comm_overlap_rs=False \
+ +model.tp_comm_overlap_disable_qkv=True \
+ model.peft.peft_scheme='lora' \
+ model.peft.lora_tuning.adapter_dim=16 \
+ model.peft.lora_tuning.alpha=32 \
+ model.peft.lora_tuning.column_init_method="kaiming" \
+ +model.peft.lora_tuning.dropout_position='pre' \
+ model.peft.lora_tuning.target_modules=['attention'] \
+ model.peft.lora_tuning.adapter_dropout=0.1 \
+ +model.peft.lora_tuning.a2a_experimental=1 \
+ model.answer_only_loss=True \
+ model.micro_batch_size=1 \
+ model.global_batch_size=1 \
+ model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ model.data.train_ds.concat_sampling_probabilities=[1.0] \
+ model.data.train_ds.num_workers=0 \
+ model.data.validation_ds.num_workers=0 \
+ model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
+ model.data.validation_ds.names=[quarel]
+ AFTER_SCRIPT: |
+ rm -rf /home/TestData/nlp/lora_tuning_tp2_sp1
+
L2_Megatron_GPT_Eval:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
@@ -3859,7 +3764,7 @@ jobs:
trainer.precision=16 \
trainer.gradient_clip_val=1.0 \
exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
- model.pipeline_model_parallel_split_rank=1 \
+ model.pipeline_model_parallel_split_rank=0 \
model.seq_length=256 \
model.encoder.num_layers=4 \
model.decoder.num_layers=1 \
@@ -4518,48 +4423,38 @@ jobs:
L2_NeMo_2_GPT_Pretraining_no_transformer_engine:
needs: [cicd-test-container-setup]
- runs-on: self-hosted-azure
- timeout-minutes: 10
- container:
- image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
- options:
- --device=/dev/nvidia0
- --gpus all
- --shm-size=8g
- --env TRANSFORMERS_OFFLINE=0
- --volume /mnt/datadrive/TestData:/home/TestData
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - run: |
- pip uninstall -y apex ## TODO: remove when apex is no longer a dependency
- pip uninstall -y transformer_engine
-
- python examples/llm/megatron_gpt_pretraining.py \
- --devices=2 \
- --max-steps=3 \
- --experiment-dir=examples/llm/gpt_pretrain_results \
- --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
- --index-mapping-dir=examples/llm/gpt_index_mappings
-
- python examples/llm/megatron_gpt_pretraining.py \
- --devices=2 \
- --max-steps=6 \
- --experiment-dir=examples/llm/gpt_pretrain_results \
- --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
- --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
- --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
- --index-mapping-dir=examples/llm/gpt_index_mappings
-
- rm -rf examples/llm/gpt_pretrain_results
- rm -rf examples/llm/gpt_index_mappings
- - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
- if: "failure()"
+ uses: ./.github/workflows/_test_template.yml
+ with:
+ RUNNER: self-hosted-azure
+ SCRIPT: |
+ pip uninstall -y apex ## TODO: remove when apex is no longer a dependency
+ pip uninstall -y transformer_engine
+
+ python examples/llm/megatron_gpt_pretraining.py \
+ --devices=2 \
+ --max-steps=3 \
+ --experiment-dir=examples/llm/gpt_pretrain_results \
+ --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
+ --index-mapping-dir=examples/llm/gpt_index_mappings
+
+ python examples/llm/megatron_gpt_pretraining.py \
+ --devices=2 \
+ --max-steps=6 \
+ --experiment-dir=examples/llm/gpt_pretrain_results \
+ --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
+ --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
+ --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
+ --index-mapping-dir=examples/llm/gpt_index_mappings
+ AFTER_SCRIPT: |
+ rm -rf examples/llm/gpt_pretrain_results
+ rm -rf examples/llm/gpt_index_mappings
Nemo_CICD_Test:
needs:
+ - gpu-test
+ - cicd-test-container-setup
- L0_Unit_Tests_GPU
- L0_Unit_Tests_CPU
- L2_Community_LLM_Checkpoints_tests_Llama
@@ -4630,6 +4525,7 @@ jobs:
- L2_Megatron_GPT_Embedding
- L2_Megatron_GPT_PEFT_Lora_PP2_O2
- L2_Megatron_GPT_PEFT_Lora_TP2_O1
+ - L2_Megatron_GPT_PEFT_Lora_TP2SP1
- L2_Megatron_GPT_Eval
- L2_Megatron_GPT_Eval_PP2
- L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 15cd016073ca..964fd419ccf5 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -34,7 +34,7 @@ WORKDIR /workspace
# Install NeMo requirements
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG MODELOPT_VERSION=0.13.0
-ARG MCORE_TAG=c7a1f82d761577e6ca0338d3521eac82f2aa0904
+ARG MCORE_TAG=2bbe55be32e2d478c4b2ce575af1cccb8fc3d9b9
ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
--mount=type=bind,source=requirements,target=requirements \
@@ -90,4 +90,3 @@ chmod 777 -R /workspace
EOF
ENV PYTHONPATH="${PYTHONPATH}:/workspace/Megatron-LM"
-
diff --git a/Dockerfile.speech b/Dockerfile.speech
index ec0b89e3afe2..e7cc670a132d 100644
--- a/Dockerfile.speech
+++ b/Dockerfile.speech
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.01-py3
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
# build an image that includes only the nemo dependencies, ensures that dependencies
# are included first for optimal caching, and useful for building a development
@@ -62,23 +62,28 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
WORKDIR /workspace/
+
+ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
+ARG MCORE_TAG=338af51452a53982d202e8386db6233adad1ce86
+ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
# Install megatron core, this can be removed once 0.3 pip package is released
# We leave it here in case we need to work off of a specific commit in main
RUN git clone https://github.com/NVIDIA/Megatron-LM.git && \
cd Megatron-LM && \
- git checkout c7a1f82d761577e6ca0338d3521eac82f2aa0904 && \
+ git checkout ${MCORE_TAG} && \
pip install .
# Performance optimizations for distributed optimizer: https://github.com/NVIDIA/apex/pull/1771
RUN git clone https://github.com/NVIDIA/apex.git && \
cd apex && \
- git checkout f058162b215791b15507bb542f22ccfde49c872d && \
- pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
+ git checkout ${APEX_TAG} && \
+ pip install -v --no-build-isolation --disable-pip-version-check --no-cache-dir \
+ --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam" ./
# Transformer Engine 1.2.0
RUN git clone https://github.com/NVIDIA/TransformerEngine.git && \
cd TransformerEngine && \
- git fetch origin da30634a6c9ccdbb6c587b6c93b1860e4b038204 && \
+ git fetch origin ${TE_TAG} && \
git checkout FETCH_HEAD && \
git submodule init && git submodule update && \
NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install .
@@ -126,7 +131,9 @@ RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/installers/install_k2.sh); INSTALL
WORKDIR /tmp/nemo
ENV LHOTSE_REQUIRE_TORCHAUDIO=0
COPY requirements .
-RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-check --no-cache-dir -r $f; done
+# exclude requirements_vllm.txt, since `vllm==0.5.x` breaks the container due to hardcoded requirements `torch==2.3.0`
+RUN for f in $(ls requirements*.txt | grep -v 'requirements_vllm.txt'); do \
+ pip3 install --disable-pip-version-check --no-cache-dir -r $f; done
# install flash attention
RUN pip install flash-attn
@@ -151,7 +158,12 @@ RUN /usr/bin/test -n "$NEMO_VERSION" && \
RUN --mount=from=nemo-src,target=/tmp/nemo,rw cd /tmp/nemo && pip install ".[all]"
# Check install
-RUN python -c "import nemo.collections.nlp as nemo_nlp" && \
+# NB: adjusting LD_LIBRARY_PATH (only here, should not be persistent!) is a temporary hack
+# to avoid failure if CUDA is unavailable (`docker build` does not expose GPUs)
+# The error is raised in NeMo Core, and the main reason is reinstalled Transformer-Engine;
+RUN export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_HOME}/compat/lib.real && \
+ python -c "import nemo.collections.asr as nemo_asr" && \
+ python -c "import nemo.collections.nlp as nemo_nlp" && \
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"
diff --git a/README.md b/README.md
index cb2a357fd7ed..9b019d3ac175 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,38 @@
# **NVIDIA NeMo Framework**
## Latest News
+
- Large Language Models and Multimodal
+ Large Language Models and Multimodal Models
+
+
+
+ New Llama 3.1 Support
+ (2024-07-23)
+
+ The NeMo Framework now supports training and customizing the Llama 3.1 collection of LLMs from Meta.
+
+
+
+
+ Accelerate your Generative AI Distributed Training Workloads with the NVIDIA NeMo Framework on Amazon EKS
+ (2024-07-16)
+
+ NVIDIA NeMo Framework now runs distributed training workloads on an Amazon Elastic Kubernetes Service (Amazon EKS) cluster. For step-by-step instructions on creating an EKS cluster and running distributed training workloads with NeMo, see the GitHub repository here.
+
+
+
+
+
+ NVIDIA NeMo Accelerates LLM Innovation with Hybrid State Space Model Support
+ (2024/06/17)
+
+ NVIDIA NeMo and Megatron Core now support pre-training and fine-tuning of state space models (SSMs). NeMo also supports training models based on the Griffin architecture as described by Google DeepMind.
+
+
+
NVIDIA releases 340B base, instruct, and reward models pretrained on a total of 9T tokens.
@@ -46,45 +74,6 @@
The walkthrough includes detailed instructions on how to set up a Google Cloud Project and pre-train a GPT model using the NeMo Framework.
-
-
-
- Bria Builds Responsible Generative AI for Enterprises Using NVIDIA NeMo, Picasso
- (2024/03/06)
-
- Bria, a Tel Aviv startup at the forefront of visual generative AI for enterprises now leverages the NVIDIA NeMo Framework.
- The Bria.ai platform uses reference implementations from the NeMo Multimodal collection, trained on NVIDIA Tensor Core GPUs, to enable high-throughput and low-latency image generation.
- Bria has also adopted NVIDIA Picasso, a foundry for visual generative AI models, to run inference.
-
-
-
-
-
- New NVIDIA NeMo Framework Features and NVIDIA H200
- (2023/12/06)
-
- NVIDIA NeMo Framework now includes several optimizations and enhancements,
- including:
- 1) Fully Sharded Data Parallelism (FSDP) to improve the efficiency of training large-scale AI models,
- 2) Mix of Experts (MoE)-based LLM architectures with expert parallelism for efficient LLM training at scale,
- 3) Reinforcement Learning from Human Feedback (RLHF) with TensorRT-LLM for inference stage acceleration, and
- 4) up to 4.2x speedups for Llama 2 pre-training on NVIDIA H200 Tensor Core GPUs.
-
-
-
-
-
-
-
-
- NVIDIA now powers training for Amazon Titan Foundation models
- (2023/11/28)
-
- NVIDIA NeMo Framework now empowers the Amazon Titan foundation models (FM) with efficient training of large language models (LLMs).
- The Titan FMs form the basis of Amazon’s generative AI service, Amazon Bedrock.
- The NeMo Framework provides a versatile framework for building, customizing, and running LLMs.
-
-
@@ -604,6 +593,53 @@ to the `gh-pages-src` branch of this repository. For detailed
information, please consult the README located at the [gh-pages-src
branch](https://github.com/NVIDIA/NeMo/tree/gh-pages-src#readme).
+## Blogs
+
+
+
+ Large Language Models and Multimodal Models
+
+
+
+ Bria Builds Responsible Generative AI for Enterprises Using NVIDIA NeMo, Picasso
+ (2024/03/06)
+
+ Bria, a Tel Aviv startup at the forefront of visual generative AI for enterprises now leverages the NVIDIA NeMo Framework.
+ The Bria.ai platform uses reference implementations from the NeMo Multimodal collection, trained on NVIDIA Tensor Core GPUs, to enable high-throughput and low-latency image generation.
+ Bria has also adopted NVIDIA Picasso, a foundry for visual generative AI models, to run inference.
+
+
+
+
+
+ New NVIDIA NeMo Framework Features and NVIDIA H200
+ (2023/12/06)
+
+ NVIDIA NeMo Framework now includes several optimizations and enhancements,
+ including:
+ 1) Fully Sharded Data Parallelism (FSDP) to improve the efficiency of training large-scale AI models,
+ 2) Mix of Experts (MoE)-based LLM architectures with expert parallelism for efficient LLM training at scale,
+ 3) Reinforcement Learning from Human Feedback (RLHF) with TensorRT-LLM for inference stage acceleration, and
+ 4) up to 4.2x speedups for Llama 2 pre-training on NVIDIA H200 Tensor Core GPUs.
+
+
+
+
+
+
+
+
+ NVIDIA now powers training for Amazon Titan Foundation models
+ (2023/11/28)
+
+ NVIDIA NeMo Framework now empowers the Amazon Titan foundation models (FM) with efficient training of large language models (LLMs).
+ The Titan FMs form the basis of Amazon’s generative AI service, Amazon Bedrock.
+ The NeMo Framework provides a versatile framework for building, customizing, and running LLMs.
+
+
+
+
+
## Licenses
- [NeMo GitHub Apache 2.0
diff --git a/docs/source/collections.rst b/docs/source/collections.rst
index d4bea503513b..0198ef250ce3 100644
--- a/docs/source/collections.rst
+++ b/docs/source/collections.rst
@@ -25,6 +25,7 @@ Documentation for the individual collections
multimodal/vlm/intro
multimodal/text2img/intro
multimodal/nerf/intro
+ mumtimoda/speech_llm/intro
.. toctree::
:maxdepth: 1
diff --git a/docs/source/core/exp_manager.rst b/docs/source/core/exp_manager.rst
index ce5f7a9cb087..1664fe59d52f 100644
--- a/docs/source/core/exp_manager.rst
+++ b/docs/source/core/exp_manager.rst
@@ -248,48 +248,6 @@ You might also want to adjust the callback parameters:
Straggler detection might involve inter-rank synchronization, and should be invoked with reasonable frequency (e.g. every few minutes).
-.. _exp_manager_straggler_det_support-label:
-
-.. note::
- Stragglers Detection feature is included in the optional NeMo resiliency package.
-
-Distributed training can be affected by stragglers, which are slow workers that slow down the overall training process.
-NeMo provides a straggler detection feature that can identify slower GPUs.
-
-This feature is implemented in the ``StragglerDetectionCallback``, which is disabled by default.
-
-The callback computes normalized GPU performance scores, which are scalar values ranging from 0.0 (worst) to 1.0 (best).
-A performance score can be interpreted as the ratio of current performance to reference performance.
-
-There are two types of performance scores provided by the callback:
- - Relative GPU performance score: The best-performing GPU in the workload is used as a reference.
- - Individual GPU performance score: The best historical performance of the GPU is used as a reference.
-
-Examples:
- - If the relative performance score is 0.5, it means that a GPU is twice slower than the fastest GPU.
- - If the individual performance score is 0.5, it means that a GPU is twice slower than its best observed performance.
-
-If a GPU performance score drops below the specified threshold, it is identified as a straggler.
-
-To enable straggler detection, add ``create_straggler_detection_callback: True`` under exp_manager in the config YAML file.
-You might also want to adjust the callback parameters:
-
-.. code-block:: yaml
-
- exp_manager:
- ...
- create_straggler_detection_callback: True
- straggler_detection_callback_params:
- report_time_interval: 300 # Interval [seconds] of the straggler check
- calc_relative_gpu_perf: True # Calculate relative GPU performance
- calc_individual_gpu_perf: True # Calculate individual GPU performance
- num_gpu_perf_scores_to_log: 5 # Log 5 best and 5 worst GPU performance scores, even if no stragglers are detected
- gpu_relative_perf_threshold: 0.7 # Threshold for relative GPU performance scores
- gpu_individual_perf_threshold: 0.7 # Threshold for individual GPU performance scores
- stop_if_detected: True # Terminate the workload if stragglers are detected
-
-Straggler detection might involve inter-rank synchronization, and should be invoked with reasonable frequency (e.g. every few minutes).
-
Fault Tolerance
---------------
@@ -334,9 +292,10 @@ Timeouts for fault detection need to be adjusted for a given workload:
checkpointing related operations should be taken into account.
If ``calculate_timeouts: True`` timeouts will be automatically estimated based on observed intervals.
-Estimated timeouts take precedence over timeouts defined in the config file. **Timeouts are estimated after
-checkpoint loading and saving was observed**. For example, in multi-part training started from scratch,
-estimated timeouts won't be available during the first run. Estimated timeouts are stored in the checkpoint.
+Estimated timeouts take precedence over timeouts defined in the config file. **Timeouts are estimated
+at the end of a training run, when checkpoint loading and saving were observed**. Hence, in a multi-part
+training started from scratch, estimated timeouts won't be available during initial two runs.
+Estimated timeouts are stored in a separate JSON file.
``max_subsequent_job_failures`` allows for the automatic continuation of training on a SLURM cluster.
This feature requires SLURM job to be scheduled with ``NeMo-Framework-Launcher``. If ``max_subsequent_job_failures``
@@ -346,10 +305,12 @@ subsequent jobs failed (SLURM job exit code is `!= 0`) or the training is comple
All FT configuration items summary:
* ``workload_check_interval`` (float, default=5.0) Periodic workload check interval [seconds] in the workload monitor.
- * ``initial_rank_heartbeat_timeout`` (Optional[float], default=60.0 * 60.0) Timeout for the first heartbeat from a rank.
- * ``rank_heartbeat_timeout`` (Optional[float], default=45.0 * 60.0) Timeout for subsequent heartbeats from a rank.
+ * ``initial_rank_heartbeat_timeout`` (Optional[float], default=60.0 * 60.0) Timeout [seconds] for the first heartbeat from a rank.
+ * ``rank_heartbeat_timeout`` (Optional[float], default=45.0 * 60.0) Timeout [seconds] for subsequent heartbeats from a rank.
* ``calculate_timeouts`` (bool, default=True) Try to calculate ``rank_heartbeat_timeout`` and ``initial_rank_heartbeat_timeout``
based on the observed heartbeat intervals.
+ * ``safety_factor``: (float, default=5.0) When calculating the timeouts, multiply the maximum observed heartbeat interval
+ by this factor to obtain the timeout estimate. Can be made smaller for stable environments and larger for unstable ones.
* ``rank_termination_signal`` (signal.Signals, default=signal.SIGKILL) Signal used to terminate the rank when failure is detected.
* ``log_level`` (str, default='INFO') Log level for the FT client and server(rank monitor).
* ``max_rank_restarts`` (int, default=0) Used by FT launcher. Max number of restarts for a rank.
diff --git a/docs/source/features/memory_optimizations.rst b/docs/source/features/memory_optimizations.rst
index 1fe8215864a9..bedc8e775f09 100644
--- a/docs/source/features/memory_optimizations.rst
+++ b/docs/source/features/memory_optimizations.rst
@@ -5,6 +5,80 @@ Parallelism
-----------
Refer to :doc:`Parallelism <./parallelisms>`.
+
+Mixture of Experts
+------------------
+
+Overview
+^^^^^^^^
+
+NeMo supports Mixture of Experts (MoE) in the transformer layer for NLP models.
+
+MoE is a machine learning technique where multiple specialized models (experts,
+usually multi-layer perceptrons) are combined to solve a complex task. Each expert
+focuses on a specific subtask or domain, while a gating network dynamically activates
+the most appropriate expert based on the current input.
+
+
+To use MoE in the NeMo Framework, adjust the ``num_moe_experts`` parameter in the model configuration:
+
+1. Set ``num_moe_experts`` to `8` to leverage 8 experts in the MoE module.
+
+ .. code-block:: yaml
+
+ num_moe_experts: 8 # Set MoE to use 8 experts
+
+2. Set ``moe_router_topk`` to the number of experts you want activated. For example, if you want to process each input with two experts:
+
+ .. code-block:: yaml
+
+ moe_router_topk: 2 # Processes each token using 2 experts.
+
+In addition, NeMo provides options to configure MoE-specific loss function.
+To balance token distribution across experts:
+
+1. Set ``moe_router_load_balancing_type`` to specify the load balancing method:
+
+ .. code-block:: yaml
+
+ moe_router_load_balancing_type: aux_loss # to use the auxilary loss, other options include "sinkhorn".
+
+2. Set ``moe_aux_loss_coeff`` to specify the weight of the auxilary loss. Values in the 1e-2 range are a good start, as follows:
+
+ .. code-block:: yaml
+
+ moe_aux_loss_coeff: 1e-2 # set the aux-loss weight to 1e-2
+
+3. Set ``moe_z_loss_coeff`` to specify the weight of the z-loss. A starting value of 1e-3 is recommended, as follows:
+
+ .. code-block:: yaml
+
+ moe_z_loss_coeff: 1e-3
+
+Other options include:
+
+1. ``moe_input_jitter_eps`` adds noise to the input tensor by applying jitter with a specified epsilon value.
+
+2. ``moe_token_dropping`` enables selectively dropping and padding tokens for each expert to achieve
+ a specified capacity.
+
+3. ``moe_token_dropping`` specifies the token dispatcher type, options include 'allgather' and 'alltoall'.
+
+4. ``moe_per_layer_logging`` enables per-layer logging for MoE, currently support aux-loss and z-loss.
+
+5. ``moe_expert_capacity_factor`` the capacity factor for each expert, None means no token will be dropped. The default is None.
+
+6. ``moe_pad_expert_input_to_capacity`` if True, pads the input for each expert to match the expert capacity length, effective only after the moe_expert_capacity_factor is set. The default setting is False.
+
+7. ``moe_token_drop_policy`` the policy to drop tokens. Can be either "probs" or "position". If "probs", the tokens with the lowest probabilities will be dropped. If "position", tokens at the end of each batch will be dropped. Default value is "probs".
+
+8. ``moe_layer_recompute`` if True, checkpointing moe_layer to save activation memory, default is False.
+
+
+
+
+
+
Flash Attention
---------------
@@ -104,7 +178,7 @@ Implement MQA or GQA
NeMo's support for GQA and MQA is enabled through the integration of Megatron Core's Attention mechanism. The underlying implementation details can be explored within the Attention class of Megatron Core, which provides the functional backbone for these advanced attention methods. To understand the specific modifications and implementations of MQA and GQA, refer to the source code in the Attention class:
-Check implementation details from Attention Class in Megatron Core Repo: https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/transformer/attention.py#L49
+Check implementation details from Attention Class in Megatron Core Repo: https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/transformer/attention.py#L49.
CPU Offloading
@@ -117,12 +191,12 @@ CPU Offloading in NeMo is a feature that reduces the peak memory usage of the GP
Features
^^^^^^^^
-> Supports training models with long sequence lengths by managing activation memory efficiently.
-> Enables high batch sizes per GPU by offloading activation memory.
-> Overlaps computation with data transfers (Host2Device and Device2Host) during offloading and reloading.
+. Supports training models with long sequence lengths by managing activation memory efficiently.
+. Enables high batch sizes per GPU by offloading activation memory.
+. Overlaps computation with data transfers (Host2Device and Device2Host) during offloading and reloading.
Usage
^^^^^
-> Set cpu_offloading to True to enable CPU offloading.
-> Set cpu_offloading_num_layers to a value between 0 and the total number of layers in the model minus one.
-> Set cpu_offloading_activations and cpu_offloading_weights based on your needs to offload activations only, weights only, or both.
+. Set cpu_offloading to True to enable CPU offloading.
+. Set cpu_offloading_num_layers to a value between 0 and the total number of layers in the model minus one.
+. Set cpu_offloading_activations and cpu_offloading_weights based on your needs to offload activations only, weights only, or both.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bcfbae89dbf5..9a1086cae5ae 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -41,6 +41,7 @@ For quick guides and tutorials, see the "Getting started" section below.
:titlesonly:
starthere/intro
+ starthere/fundamentals
starthere/tutorials
For more information, browse the developer docs for your area of interest in the contents section below or on the left sidebar.
diff --git a/docs/source/multimodal/mllm/checkpoint.rst b/docs/source/multimodal/mllm/checkpoint.rst
deleted file mode 100644
index d1fe7b651e66..000000000000
--- a/docs/source/multimodal/mllm/checkpoint.rst
+++ /dev/null
@@ -1,114 +0,0 @@
-Checkpoints
-===========
-
-In this section, we present four key functionalities of NVIDIA NeMo related to checkpoint management:
-
-1. **Checkpoint Loading**: Load local ``.nemo`` checkpoint files with the :code:`restore_from()` method.
-2. **Partial Checkpoint Conversion**: Convert partially-trained ``.ckpt`` checkpoints to the ``.nemo`` format.
-3. **Community Checkpoint Conversion**: Transition checkpoints from community sources, like HuggingFace, into the ``.nemo`` format.
-4. **Model Parallelism Adjustment**: Modify model parallelism to efficiently train models that exceed the memory of a single GPU. NeMo employs both tensor (intra-layer) and pipeline (inter-layer) model parallelisms. Dive deeper with "Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM" (`link `_). This tool aids in adjusting model parallelism, accommodating users who need to deploy on larger GPU arrays due to memory constraints.
-
-Understanding Checkpoint Formats
---------------------------------
-
-A ``.nemo`` checkpoint is fundamentally a tar file that bundles the model configurations (given as a YAML file), model weights, and other pertinent artifacts like tokenizer models or vocabulary files. This consolidated design streamlines sharing, loading, tuning, evaluating, and inference.
-
-On the other hand, the ``.ckpt`` file is a product of PyTorch Lightning training. It stores model weights and optimizer states, and it's generally used for resuming training.
-
-Subsequent sections delve into each of the previously listed functionalities, emphasizing the loading of fully trained checkpoints for evaluation or additional fine-tuning.
-
-
-Loading Local Checkpoints
--------------------------
-
-NeMo inherently saves any model's checkpoints in the ``.nemo`` format. To manually save a model at any stage:
-
-.. code-block:: python
-
- model.save_to(.nemo)
-
-To load a local ``.nemo`` checkpoint:
-
-.. code-block:: python
-
- import nemo.collections.multimodal as nemo_multimodal
- model = nemo_multimodal.models..restore_from(restore_path="")
-
-Replace `` with the appropriate MM model class.
-
-Converting Local Checkpoints
-----------------------------
-
-The training script only auto-converts the final checkpoint into the ``.nemo`` format. To evaluate intermediate training checkpoints, conversion to ``.nemo`` might be needed. For this:
-
-.. code-block:: bash
-
- python -m torch.distributed.launch --nproc_per_node= * \
- examples/multimodal/convert_ckpt_to_nemo.py \
- --checkpoint_folder \
- --checkpoint_name \
- --nemo_file_path \
- --tensor_model_parallel_size \
- --pipeline_model_parallel_size
-
-Converting Community Checkpoints
---------------------------------
-
-NeVA Checkpoints
-^^^^^^^^^^^^^^^^
-
-Currently, the conversion mainly supports LLaVA checkpoints based on "llama-2 chat" checkpoints. As a reference, we'll consider the checkpoint `llava-llama-2-13b-chat-lightning-preview `_.
-
-After downloading this checkpoint and saving it at ``/path/to/llava-llama-2-13b-chat-lightning-preview``, undertake the following procedures:
-
-Modifying the Tokenizer
-"""""""""""""""""""""""
-
-NeMo mandates adding specific tokens to the tokenizer model for peak performance. To modify an existing tokenizer located in ``/path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer``, execute the following in the NeMo container:
-
-.. code-block:: bash
-
- cd /opt/sentencepiece/src/
- protoc --python_out=/opt/NeMo/scripts/tokenizers/ sentencepiece_model.proto
- python /opt/NeMo/scripts/tokenizers/add_special_tokens_to_sentencepiece.py \
- --input_file /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer.model \
- --output_file /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer_neva.model \
- --is_userdefined \
- --tokens "" "" "" "" \
- "" "" "" ""
-
-Checkpoint Conversion
-"""""""""""""""""""""
-
-For conversion:
-
-.. code-block:: bash
-
- python examples/multimodal/mllm/neva/convert_hf_llava_to_neva.py \
- --in-file /path/to/llava-llama-2-13b-chat-lightning-preview \
- --out-file /path/to/neva-llava-llama-2-13b-chat-lightning-preview.nemo \
- --tokenizer-model /path/to/llava-llama-2-13b-chat-lightning-preview/tokenizer_add_special.model
- --conv-template llama_2
-
-
-Model Parallelism Adjustment
-----------------------------
-
-NeVA Checkpoints
-^^^^^^^^^^^^^^^^
-
-Adjust model parallelism with:
-
-.. code-block:: bash
-
- python examples/nlp/language_modeling/megatron_change_num_partitions.py \
- --model_file=/path/to/source.nemo \
- --target_file=/path/to/target.nemo \
- --tensor_model_parallel_size=??? \
- --target_tensor_model_parallel_size=??? \
- --pipeline_model_parallel_size=??? \
- --target_pipeline_model_parallel_size=??? \
- --model_class="nemo.collections.multimodal.models.multimodal_llm.neva.neva_model.MegatronNevaModel" \
- --precision=32 \
- --tokenizer_model_path=/path/to/tokenizer.model \
- --tp_conversion_only
diff --git a/docs/source/multimodal/mllm/intro.rst b/docs/source/multimodal/mllm/intro.rst
index 0e76a9737a0f..c67e47e34537 100644
--- a/docs/source/multimodal/mllm/intro.rst
+++ b/docs/source/multimodal/mllm/intro.rst
@@ -8,7 +8,21 @@ The endeavor to extend Language Models (LLMs) into multimodal domains by integra
datasets
configs
- checkpoint
neva
video_neva
sequence_packing
+
+
+Speech-agumented Large Language Models (SpeechLLM)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The endeavor to extend Language Models (LLMs) with the ability to understand speech and audio inputs, detailed examples can be found in the `SpeechLLM example `_..
+
+.. toctree::
+ :maxdepth: 1
+
+ ../speech_llm/intro
+ ../speech_llm/datasets
+ ../speech_llm/configs
+ ../speech_llm/api
+
diff --git a/docs/source/multimodal/speech_llm/api.rst b/docs/source/multimodal/speech_llm/api.rst
new file mode 100644
index 000000000000..142190fd411d
--- /dev/null
+++ b/docs/source/multimodal/speech_llm/api.rst
@@ -0,0 +1,88 @@
+SpeechLLM API
+=============
+
+Model Classes
+-------------
+
+.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_base_model.MegatronBaseModel
+ :show-inheritance:
+ :no-members:
+ :members: __init__, configure_optimizers
+ :no-index:
+
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.models.modular_models.ModularAudioGPTModel
+ :show-inheritance:
+ :no-members:
+ :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets
+
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.models.modular_models.CrossAttendModularAudioGPTModel
+ :show-inheritance:
+ :no-members:
+ :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets
+
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.models.modular_t5_models.ModularizedAudioT5Model
+ :show-inheritance:
+ :no-members:
+ :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets
+
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.models.modular_t5_models.DecoderTextPromptModularizedAudioT5Model
+ :show-inheritance:
+ :no-members:
+ :members: __init__, training_step, validation_step, setup, build_train_valid_test_datasets
+
+
+
+Modules
+-------
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.modules.perception_modules.AudioPerceptionModule
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.modules.perception_modules.MultiAudioPerceptionModule
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.modules.TransformerCrossAttention
+ :show-inheritance:
+ :no-members:
+
+
+Dataset Classes
+---------------
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.audio_text_dataset.AudioTextDataset
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.audio_text_dataset.TarredAudioTextDataset
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.audio_text_dataset.get_tarred_audio_text_dataset_from_config
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.audio_text_dataset.get_audio_text_dataset_from_config
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.lhotse_dataset.LhotseAudioQuestionAnswerDataset
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.build_dataset.build_speechllm_dataset
+ :show-inheritance:
+ :no-members:
+
+.. autoclass:: nemo.collections.multimodal.speech_llm.data.build_dataset.build_speechllm_dataloader
+ :show-inheritance:
+ :no-members:
+
+
+
+
+
diff --git a/docs/source/multimodal/speech_llm/configs.rst b/docs/source/multimodal/speech_llm/configs.rst
new file mode 100644
index 000000000000..5edd169eed25
--- /dev/null
+++ b/docs/source/multimodal/speech_llm/configs.rst
@@ -0,0 +1,197 @@
+Common Configuration Files
+==========================
+
+This section provides a detailed overview of the NeMo configuration file setup specific to models within the NeMo SpeechLLM collection. For foundational knowledge about setting up and executing experiments common to all NeMo models, such as the Experiment Manager and PyTorch Lightning trainer parameters, refer to the :doc:`core <../../core/core>` documentation.
+
+Within the configuration files of the NeMo SpeechLLMs, details concerning dataset(s), augmentation, optimization parameters, and model architectural specifications are central. This page explores each of these aspects.
+
+Discover exemplary configuration files for all SpeechLLMs in the `config directory of the examples `_.
+
+
+Dataset Configuration
+---------------------
+
+The dataset configuration is based on the NeMo ASR data configuration and the NLP data configuration
+
+The configuration file allows setting any initialization parameter accepted by the Dataset class used in the experiment. For a comprehensive list of Datasets and their parameters, visit the `Datasets <./api.html#Datasets>`__ section of the API.
+
+A typical training configuration is as follows:
+
+.. code-block:: yaml
+
+ train_ds:
+ manifest_filepath: ??? # Path to a list of JSONL files corresponding to the source data.
+ global_batch_size: 4
+ micro_batch_size: 2
+ shuffle: True
+ num_workers: 0
+ pin_memory: True
+ max_seq_length: 2048
+ min_seq_length: 1
+ drop_last: True
+ concat_sampling_probabilities: null # When providing a list of datasets, this arg defines the sampling probabilities from each dataset when strategy='random'
+ context_key: 'context'
+ answer_key: 'answer'
+ add_eos: True
+ add_eos: False
+ end_string: null
+ add_sep: False
+ add_bos: False
+ separate_prompt_and_response_with_newline: False
+ truncation_field: "context" # Options: ['context', 'answer']
+ prompt_template: "Q: {context}\nA: {answer}" # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}"
+ # ASR configs
+ sample_rate: 16000 #${model.audio_encoder.preprocessor.sample_rate}
+ max_duration: 24 # it is set for LibriSpeech, you may need to update it for your dataset
+ min_duration: 0.1
+ # tarred datasets
+ is_tarred: false
+ tarred_audio_filepaths: null
+ shuffle_n: 2048
+ # bucketing params
+ bucketing_strategy: "fully_randomized"
+ bucketing_batch_size: null
+ # multi-audio configs
+ audio_locator: null
+
+
+Key parameters include:
+
+- ``manifest_filepath``: The path to the dataset in JSON lines format, where each line in the file is a python dictionary. This can either be a single file or a list of files.
+- ``global_batch_size``: The global batch size that takes consideration of gradient accumulation, data parallelism.
+- ``micro_batch_size``: The micro batch size that fits on each GPU.
+- ``shuffle``: Whether to shuffle the dataset.
+- ``num_workers``: The number of workers to use for data loading.
+- ``pin_memory``: Whether to pin memory for faster data transfer.
+- ``max_seq_length``: The maximum sequence length for LLM.
+- ``min_seq_length``: The minimum sequence length for LLM.
+- ``drop_last``: Whether to drop the last batch if it is smaller than the batch size.
+- ``context_key``: The key in the JSON line that corresponds to the context used for LLM input.
+- ``answer_key``: The key in the JSON line that corresponds to the answer used for groundtruth.
+- ``add_eos``: Whether to add an end-of-sequence token.
+- ``add_bos``: Whether to add a beginning-of-sequence token.
+- ``add_sep``: Whether to add a separator token.
+- ``end_string``: The string to used to trigger end of generation, default to null to use EOS token.
+- ``separate_prompt_and_response_with_newline``: Whether to separate the prompt and response with a newline.
+- ``truncation_field``: The field to truncate if the sequence length exceeds the maximum sequence length.
+- ``prompt_template``: The fstring to use for the LLM prompt, where the context and answer will be formatted.
+- ``sample_rate``: The sample rate of the audio data.
+- ``max_duration``: The maximum duration of the audio data to be included.
+- ``min_duration``: The minimum duration of the audio data to be included.
+- ``is_tarred``: Whether the dataset is tarred.
+- ``tarred_audio_filepaths``: The path to the tarred audio files.
+- ``shuffle_n``: The number of samples to shuffle in tarred datasets, not used for non-tarred datasets.
+- ``bucketing_strategy``: The strategy to use for bucketing, options include 'fully_randomized', 'synced_randomized'.
+- ``bucketing_batch_size``: The batch size to use for each bucket, if not provided, the micro batch size is used.
+- ``audio_locator``: The special string to locate the position of each audio to be put in the text prompt.
+
+
+Trainer Configuration
+---------------------
+
+This section outlines arguments for the Pytorch Lightning Trainer Object.
+
+.. code-block:: yaml
+
+ trainer:
+ devices: 1 # number of GPUs (0 for CPU), or list of the GPUs to use e.g. [0, 1]
+ num_nodes: 1
+ max_epochs: -1
+ max_steps: 2500000 # precedence over max_epochs
+ logger: False # Provided by exp_manager
+ precision: bf16 # Should be set to 16 for O1 and O2 to enable the AMP.
+ accelerator: gpu
+ log_every_n_steps: 5 # Interval of logging.
+ resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
+ num_sanity_val_steps: 10 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
+ enable_checkpointing: False # Provided by exp_manager
+ accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models
+ gradient_clip_val: 1.0
+ benchmark: False
+ enable_model_summary: True
+
+For a detailed list of arguments, refer to the `Pytorch Lightning Trainer `__ API section.
+
+Experiment Manager Configurations
+---------------------------------
+
+The NeMo Experiment Manager provides a streamlined approach to manage various tasks such as logging, saving, and resuming.
+
+.. code-block:: yaml
+
+ exp_manager:
+ exp_dir: null # exp_dir for your experiment, if None, defaults to "./nemo_experiments"
+ name: ${name}
+ create_wandb_logger: True
+ wandb_logger_kwargs: # Whether you want exp_manger to create a Wandb logger
+ name: training-session
+ project: text2img
+ group: nemo
+ resume: True
+ create_tensorboard_logger: True # Whether you want exp_manger to create a tb logger
+ create_checkpoint_callback: True # Whether you want exp_manager to create a modelcheckpoint callback
+ checkpoint_callback_params:
+ monitor: reduced_train_loss
+ save_top_k: 5
+ every_n_epochs: 0 # Save checkpoint frequency.
+ every_n_train_steps: 1000 # Mutually exclusive with every_n_epochs. It is recommended to set this if training on large-scale dataset.
+ filename: '${name}--{reduced_train_loss:.2f}-{step}-{consumed_samples}'
+ resume_if_exists: True
+ resume_ignore_no_checkpoint: True
+ resume_from_checkpoint: ${model.resume_from_checkpoint}
+ ema:
+ enable: True
+ decay: 0.9999
+ validate_original_weights: False
+ every_n_steps: 1
+ cpu_offload: False
+
+Optimizer Configurations
+-------------------------
+
+.. code-block:: yaml
+
+ optim:
+ name: fused_adam
+ lr: 0.0001
+ eps: 1e-8
+ betas: [ 0.9, 0.999 ]
+ weight_decay: 0.01
+ sched:
+ name: WarmupPolicy
+ warmup_steps: 10000
+ warmup_ratio: null
+
+The default optimizer used is ``fused_adam``. For details on all supported optimizers, refer to the NeMo user guide. The learning rate scheduler can be specified in the ``optim.sched`` section.
+
+Model Configurations
+--------------------
+
+Each configuration file should detail the model architecture used for the experiment.
+
+The parameters commonly shared across most multimodal language models include:
+
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+| **Parameter** | **Datatype** | **Description** |
++===========================+==============+==============+=======================================================================================+
+| :code:`micro_batch_size` | int | micro batch size that fits on each GPU |
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+| :code:`global_batch_size` | int | global batch size that takes consideration of gradient accumulation, data parallelism |
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+| :code:`tensor_model_parallel_size` | int | intra-layer model parallelism |
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+| :code:`pipeline_model_parallel_size` | int | inter-layer model parallelism |
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+| :code:`seed` | int | seed used in training |
++------------------------------------------+--------------+---------------------------------------------------------------------------------------+
+
+SALM
+~~~~
+
+For model-specific configurations, refer to `the examples `_.
+
+
+BESTOW
+~~~~~~
+
+For model-specific configurations, refer to `the examples `_.
diff --git a/docs/source/multimodal/speech_llm/datasets.rst b/docs/source/multimodal/speech_llm/datasets.rst
new file mode 100644
index 000000000000..c251213eb3d6
--- /dev/null
+++ b/docs/source/multimodal/speech_llm/datasets.rst
@@ -0,0 +1,109 @@
+SpechLLM Dataset
+================
+
+The dataset classes can be found on `NeMo GitHub `_.
+
+
+Input Manifest Format
+---------------------
+
+You'll need to prepare data in the NeMo manifest format, where each line is a python dictionary with some keys, for example:
+
+.. code-block:: yaml
+
+ {
+ "audio_filepath": "path/to/audio.wav",
+ "offset": 0.0, # offset of the audio in seconds, this is an optional field
+ "duration": 10.0 , # duration of the audio in seconds, can set to `None` to load the whole audio
+ "context": "what is the transcription of the audio?", # text prompt for the audio, see below for more details
+ "answer": "the transcription of the audio", # optional for inference, default to "na" in dataloader
+ }
+
+
+The `context` field in the manifest is optional, and you can put a list of context in a context file (one context for each line) then set `++model.data.train_ds.context_file=` to ask the dataloader to randomly pick a context from the file for each audio sample. This is useful for training with multiple prompts for the same task. If neither `context` field nor `context_file` is provided, the dataloader will use a default context `what does the audio mean?` for all audios. During inference, it is recommended to have the `context` field in the manifest.
+
+Customizing the fields to use
+-----------------------------
+
+You can also use other fields in the manifest to replace the `context` and `answer`fields, but you'll also need to change the `prompt_template` to use the new field names. For example, if you desire to use the new fields `input_text` and `output_text`, you need to set:
+
+.. code-block:: bash
+
+ ++model.data.train_ds.context_key=input_text \
+ ++model.data.train_ds.answer_key=output_text \
+ ++model.data.train_ds.prompt_template="'Q: {input_text}\nA: {output_text}'"
+
+Note that there're single quotes around the prompt template (to avoid hydra errors), and the field names are wrapped in curly braces.
+
+
+Customizing the input format
+----------------------------
+
+If you would like to use multiple audios, you can set the `audio_filepath` to be a list of audio file paths, and specify the location of each audio by using a special `audio_locator` string in the context. The choice of `audio_locator` should also be passed into the config. For example, if you have a manifest item like this:
+
+.. code-block:: yaml
+
+ {
+ "audio_filepath": ["path/to/audio1.wav", "path/to/audio2.wav"],
+ "context": "what is the transcription of the [audio] and [audio]?", # text prompt for the audio, see below for more details
+ "answer": "the transcription of the audio1 and audio2", # optional for inference, default to "na" in dataloader
+ }
+
+
+You can set the `audio_locator` to be `[audio]` in the config:
+
+.. code-block:: bash
+
+ ++model.data.train_ds.audio_locator='[audio]'
+
+
+By using `audio_locator`, the dataloader will replace the `audio_locator` in the context with the corresponding audio features extracted for each audio. You need to make sure that the number of audio locators in the context matches the number of audio files in the `audio_filepath` field.
+
+
+
+Multi-task Training
+-------------------
+
+
+In order to use a context file, you can set `++model.data.train_ds.context_file=` in the command line or use multiple context files with `++model.data.train_ds.context_file=[,,...]`. If the number of context files is equal to the number of provided datasets, the dataloader will assigne each context file to a dataset. Otherwise, the dataloader will randomly pick a context file from all provided context files for each audio sample. Using multiple context files is useful for training with multiple tasks, where each task has its own set of prompts. Meanwhile, you can control the weights for different tasks/datasets by using concatentated tarred datasets, where you can assign weights to datasets by:
+
+.. code-block:: bash
+
+ ++model.data.train_ds.is_tarred=True \
+ ++model.data.train_ds.is_concat=True \
+ ++model.data.train_ds.manifest_filepath=[/path/to/data1/tarred_audio_manifest.json,/path/to/data2/tarred_audio_manifest.json] \
+ ++model.data.train_ds.tarred_audio_filepaths=[/path/to/data1/audio__OP_0..1023_CL_.tar,/path/to/data2/audio__OP_0..1023_CL_.tar] \
+ ++model.data.train_ds.concat_sampling_technique='random' \
+ ++model.data.train_ds.concat_sampling_probabilities=[0.4,0.6] \
+
+
+
+Use Lhotse Dataloader
+---------------------
+
+Speech-LLM supports NeMo dataloader and Lhotse dataloader. Most of the Lhotse specific flags can be referred to `Lhotse Dataloader `.
+Example config can be referred to `Lhotse Speech-LLM examples `_.
+
+Lhotse Dataloader also supports using a standalone YAML file to set up the manifest info:
+
+.. code-block:: bash
+
+ ++model.data.train_ds.input_cfg=$INPUT_CFG_FILE \
+
+which points to a $INPUT_CFG_FILE file like the following:
+
+.. code-block:: yaml
+
+ - input_cfg:
+ - manifest_filepath: manifest1.json
+ type: nemo
+ weight: 2.0
+ tags:
+ default_context: "please transcribe the audio"
+ - manifest_filepath: manifest2.json
+ type: nemo
+ weight: 1.0
+ tags:
+ default_context: "please translate English audio to German"
+ type: group
+ weight: 0.4
diff --git a/docs/source/multimodal/speech_llm/intro.rst b/docs/source/multimodal/speech_llm/intro.rst
new file mode 100644
index 000000000000..55ea13d7d411
--- /dev/null
+++ b/docs/source/multimodal/speech_llm/intro.rst
@@ -0,0 +1,41 @@
+Speech-agumented Large Language Models (SpeechLLM)
+==================================================
+
+The endeavor to extend Language Models (LLMs) with the ability to understand speech and audio inputs, detailed examples can be found in the `SpeechLLM example `_..
+
+.. toctree::
+ :maxdepth: 1
+ datasets
+ configs
+ api
+
+
+In general, there're three main components of a modular SpeechLLM:
+- An audio encoder that processes the input audio and produces a sequence of audio embeddings.
+- A modality adapter that processes the audio embeddings and produces a sequence of embeddings in the same latent space as the token embeddings of a pretrained large language model (LLM).
+- A pretrained large language model (LLM) that processes embeddings from the modality adapter as well as token embeddings of input prompt, and produces the text output. The audio embeddings and text token embeddings are concatenated in time dimension before going into the LLM.
+- The LLM produces text outputs based on the concatenated input audio and text embedding.
+
+
+Model Architecture
+^^^^^^^^^^^^^^^^^^
+
+One way to incorporate speech into LLM is to concatenate speech features with the token embeddings of the input text prompt before being fed into the LLM. In this way, the LLM can have direct access to the speech information when generating the output text.
+ .. image:: https://github.com/NVIDIA/NeMo/releases/download/v1.23.0/salm.png
+ :align: center
+ :alt: SALM model
+ :scale: 50%
+
+
+
+Another way is to use cross-attention mechanism, by using text embeddings to attend to speech embeddings to extract task-specific information from the speech embeddings. In order to minimize the computational cost of cross-attention, we add a cross-attention module only before the LLM.
+
+ .. image:: https://github.com/NVIDIA/NeMo/releases/download/v1.23.0/bestow.png
+ :align: center
+ :alt: BESTOW model
+ :scale: 50%
+
+
+
+
+
diff --git a/docs/source/multimodal/vlm/checkpoint.rst b/docs/source/multimodal/vlm/checkpoint.rst
index 996d9828f5aa..d984f1453510 100644
--- a/docs/source/multimodal/vlm/checkpoint.rst
+++ b/docs/source/multimodal/vlm/checkpoint.rst
@@ -35,58 +35,36 @@ To load a local ``.nemo`` checkpoint:
Replace `` with the appropriate MM model class.
-Converting Local Checkpoints
-----------------------------
-
-Only the last checkpoint is automatically saved in the ``.nemo`` format. If intermediate training checkpoints evaluation is required, a ``.nemo`` conversion might be necessary. For this, refer to the script at `script `_:
-
-.. code-block:: python
-
- python -m torch.distributed.launch --nproc_per_node= * \
- examples/multimodal/convert_ckpt_to_nemo.py \
- --checkpoint_folder \
- --checkpoint_name \
- --nemo_file_path \
- --tensor_model_parallel_size \
- --pipeline_model_parallel_size
-
Converting Community Checkpoints
--------------------------------
CLIP Checkpoints
^^^^^^^^^^^^^^^^
-To migrate community checkpoints:
-.. code-block:: python
+To migrate community checkpoints, use the following command:
+
+.. code-block:: bash
- python examples/multimodal/foundation/clip/convert_external_clip_to_nemo.py \
- --arch=ViT-H-14 \
- --version=laion2b_s32b_b79k \
- --hparams_file=path/to/saved.yaml \
- --nemo_file_path=open_clip.nemo
+ torchrun --nproc-per-node=1 /opt/NeMo/scripts/checkpoint_converters/convert_clip_hf_to_nemo.py \
+ --input_name_or_path=openai/clip-vit-large-patch14 \
+ --output_path=openai_clip.nemo \
+ --hparams_file=/opt/NeMo/examples/multimodal/vision_language_foundation/clip/conf/megatron_clip_VIT-L-14.yaml
Ensure the NeMo hparams file has the correct model architectural parameters, placed at `path/to/saved.yaml`. An example can be found in `examples/multimodal/foundation/clip/conf/megatron_clip_config.yaml`.
-For OpenCLIP migrations, provide the architecture (`arch`) and version (`version`) according to the OpenCLIP `model list `_. For Hugging Face conversions, set the version to `huggingface` and the architecture (`arch`) to the specific Hugging Face model identifier, e.g., `yuvalkirstain/PickScore_v1`.
+After conversion, you can verify the model with the following command:
-Model Parallelism Adjustment
-----------------------------
+.. code-block:: bash
-CLIP Checkpoints
-^^^^^^^^^^^^^^^^
+ wget https://upload.wikimedia.org/wikipedia/commons/0/0f/1665_Girl_with_a_Pearl_Earring.jpg
+ torchrun --nproc-per-node=1 /opt/NeMo/examples/multimodal/vision_language_foundation/clip/megatron_clip_infer.py \
+ model.restore_from_path=./openai_clip.nemo \
+ image_path=./1665_Girl_with_a_Pearl_Earring.jpg \
+ texts='["a dog", "a boy", "a girl"]'
-To adjust model parallelism from original model parallelism size to a new model parallelism size (Note: NeMo CLIP currently only supports `pipeline_model_parallel_size=1`):
+It should generate a high probability for the "a girl" tag. For example:
-.. code-block:: python
+.. code-block:: text
- python examples/nlp/language_modeling/megatron_change_num_partitions.py \
- --model_file=/path/to/source.nemo \
- --target_file=/path/to/target.nemo \
- --tensor_model_parallel_size=??? \
- --target_tensor_model_parallel_size=??? \
- --pipeline_model_parallel_size=-1 \
- --target_pipeline_model_parallel_size=1 \
- --precision=32 \
- --model_class="nemo.collections.multimodal.models.clip.megatron_clip_models.MegatronCLIPModel" \
- --tp_conversion_only
+ Given image's CLIP text probability: [('a dog', 0.0049710185), ('a boy', 0.002258187), ('a girl', 0.99277073)]
diff --git a/docs/source/starthere/fundamentals.rst b/docs/source/starthere/fundamentals.rst
new file mode 100644
index 000000000000..6413cb9d376a
--- /dev/null
+++ b/docs/source/starthere/fundamentals.rst
@@ -0,0 +1,242 @@
+NeMo Fundamentals
+=================
+
+On this page, we’ll look into how NeMo works, providing you with a solid foundation to effectively use NeMo for you :ref:`specific use case `.
+
+NeMo Models
+-----------
+
+NVIDIA NeMo is a powerful framework for building and deploying neural network models, including those used in generative AI, speech recognition, and natural language processing. NeMo stands for “Neural Modules,” which are the building blocks of the models created using this platform. NeMo includes all of the following components wrapped into a singular, cohesive unit:
+
+* neural network architecture
+
+* dataset and data loaders
+
+* preprocessing of input data and postprocessing of model outputs
+
+* loss function, optimizer, and schedulers
+
+* any other supporting infrastructure, such as tokenizers, language model configuration, and data augmentation
+
+NeMo models are built on PyTorch, with many of their components being subclasses of ``torch.nn.Module``. Additionally, NeMo models utilize PyTorch Lightning (PTL) for training, which helps reduce the boilerplate code required.
+
+NeMo models are also designed to be easily configurable; often this is done with YAML files. Below we show simplified examples of a NeMo model defined in pseudocode and a config defined in YAML. We highlight the lines where the Python config parameter is read from the YAML file.
+
+.. list-table:: Simplified examples of a model and config.
+ :widths: 1 1
+ :header-rows: 0
+
+ * - .. code-block:: python
+ :caption: NeMo model definition (Python pseudocode)
+ :linenos:
+ :emphasize-lines: 4, 7, 10, 13, 16, 20
+
+ class ExampleEncDecModel:
+ # cfg is passed so it only contains "model" section
+ def __init__(self, cfg, trainer):
+ self.tokenizer = init_from_cfg(cfg.tokenizer)
+
+
+ self.encoder = init_from_cfg(cfg.encoder)
+
+
+ self.decoder = init_from_cfg(cfg.decoder)
+
+
+ self.loss = init_from_cfg(cfg.loss)
+
+
+ # optimizer configured via parent class
+
+
+ def setup_training_data(self, cfg):
+ self.train_dl = init_dl_from_cfg(cfg.train_ds)
+
+ def forward(self, batch):
+ # forward pass defined,
+ # as is standard for PyTorch models
+ ...
+
+ def training_step(self, batch):
+ log_probs = self.forward(batch)
+ loss = self.loss(log_probs, labels)
+ return loss
+
+
+ - .. code-block:: yaml
+ :caption: Experiment config (YAML)
+ :linenos:
+ :emphasize-lines: 4, 7, 10, 13, 16, 20
+
+ #
+ # configuration of the NeMo model
+ model:
+ tokenizer:
+ ...
+
+ encoder:
+ ...
+
+ decoder:
+ ...
+
+ loss:
+ ...
+
+ optim:
+ ...
+
+
+ train_ds:
+ ...
+
+ # configuration of the
+ # PyTorch Lightning trainer object
+ trainer:
+ ...
+
+
+Configuring and Training NeMo Models
+------------------------------------
+
+During initialization of the model, the "model" section of the config is passed into the model's constructor (as the variable ``cfg``, see line 3 of the left panel above). The model class will read key parameters from the ``cfg`` variable to configure the model (see highlighted lines in the left panel above).
+
+The other object passed into the model's constructor is a PyTorch Lightning ``trainer`` object, which manages the training process. The trainer handles the standard training `boilerplate `__. For non-standard tasks, PyTorch Lightning (PTL) relies on specific methods defined in our NeMo model. For example, PTL mandates that every model must have a specified ``training_step`` method (left panel above, line 27).
+
+The trainer’s configuration is also specified in the config (right panel above, line 25 onwards). This includes parameters such as ``accelerator``, (number of) ``devices``, ``max_steps``, (numerical) ``precision`` and `more `__.
+
+
+Example Training Script
+-----------------------
+
+Below is an example training script for our ``ExampleEncDecModel`` model. We highlight the three most important lines that combine everything we discussed in the previous section:
+
+.. code-block:: python
+ :caption: run_example_training.py
+ :linenos:
+ :emphasize-lines: 10, 11, 12
+
+ import pytorch_lightning as pl
+ from nemo.collections.path_to_model_class import ExampleEncDecModel
+ from nemo.core.config import hydra_runner
+
+ @hydra_runner(
+ config_path="config_file_dir_path",
+ config_name="config_file_name"
+ )
+ def main(cfg):
+ trainer = pl.Trainer(**cfg.trainer)
+ model = ExampleEncDecModel(cfg.model, trainer)
+ trainer.fit(model)
+
+ if __name__ == '__main__':
+ main(cfg)
+
+
+Let's go through the code:
+
+* *Lines 1-3*: import statements (second one is made up for the example).
+* *Lines 5-8*: the decorator will look for a config file at ``{config_path}/{config_name}.yaml`` and load its contents into the ``cfg`` object that is passed into the ``main`` function on line 9. This functionality is provided by `Hydra `__. Instead of a YAML file, we could also have specified the default config as a dataclass and passed that into the ``@hydra_runner`` decorator.
+* *Line 10*: initialize a PTL trainer object using the parameters specified in the ``trainer`` section of the config.
+* *Line 11*: initialize a NeMo model, passing in both the parameters in the ``model`` section of the config, and a PTL ``trainer`` object.
+* *Line 12*: call ``trainer.fit`` on the model. This one unassuming line will carry out our entire training process. PTL will make sure we iterate over our data and call the ``training_step`` we define for each batch (as well as any other PTL `callbacks `__ that may have been defined).
+
+
+
+Overriding Configs
+------------------
+
+The ``cfg`` object in the script above is a dictionary-like object that contains our configuration parameters. Specifically, it is an `OmegaConf `__ ``DictConfig`` object. These objects have special features such as dot-notation `access `__, `variable interpolation `__, and the ability to set `mandatory values `__.
+
+You can run the script above by running the following:
+
+.. code-block:: bash
+
+ python run_example_training.py
+
+The script will use the default config file specified inside the ``@hydra_runner`` decorator.
+
+To specify a different config file, you can call the script like this:
+
+.. code-block:: diff
+
+ python run_example_training.py \
+ + --config_path="different_config_file_dir_path" \
+ + --config_name="different_config_file_name"
+
+You can also override, delete, or add elements to the config by calling a script like this:
+
+
+.. code-block:: diff
+
+ python run_example_training.py \
+ --config_path="different_config_file_dir_path" \
+ --config_name="different_config_file_name" \
+ + model.optim.lr=0.001 \ # overwriting
+ + model.train_ds.manifest_filepath="your_train_data.json" \ # overwriting
+ + ~trainer.max_epochs \ # deleting
+ + +trainer.max_steps=1000 # adding
+
+Running NeMo Scripts
+--------------------
+
+NeMo scripts typically take on the form shown above, where the Python script relies on a config object which has some specified default values that you can choose to override.
+
+The NeMo `examples `__ directory provides numerous scripts for training and inference of various existing NeMo models. It’s important to note that these scripts include default configurations for model, optimize, and training parameters, which have been fine-tuned by the NeMo team over extensive GPU-hours of experimentation. As a result, we recommend using these default configurations as a starting point for your own experiments
+
+
+NeMo Inference Scripts
+######################
+
+The examples scripts directory also contains many inference scripts such as `transcribe_speech.py `_. These inference scripts typically differ in structure from training scripts, as they include additional utilities for file I/O (reading and saving files). While inference scripts still use configurations (configs), they don’t require the ``trainer`` and ``model`` sections. Additionally, the default configs for inference scripts are usually specified as dataclasses rather than separate files. You can also modify elements via the command line.
+
+Specifying training data
+------------------------
+
+NeMo will handle creation of data loaders for you, as long as you put your data into the expected input format. You may also need to train a tokenizer before starting training. To learn more about data formats, see :doc:`LLM <../nlp/nemo_megatron/gpt/gpt_training>`, :doc:`Multimodal <../multimodal/mllm/datasets>`, :ref:`Speech AI `, and :doc:`Vision models <../vision/datasets>`.
+
+
+Model Checkpoints
+-----------------
+
+Throughout training, the model :doc:`checkpoints <../checkpoints/intro>` will be saved inside ``.nemo`` files. These are archive files containing all the necessary components to restore a usable model. For example:
+
+* model weights (``.ckpt`` files)
+* model configuration (``.yaml`` files)
+* tokenizer files
+
+The NeMo team also releases pretrained models which you can browse on `NGC `_ and `HuggingFace Hub `_.
+
+
+Fine-Tuning
+----------
+
+NeMo allows you to fine-tune models as well as train them from scratch.
+
+You can achieve this by initializing a model with random weights, then replacing some or all of those weights with the pretrained model’s weights. Afterward, continue training as usual, possibly making minor adjustments like reducing the learning rate or freezing specific model parameters.
+
+
+.. _where_next:
+
+Where To Go Next?
+-----------
+
+Here are some options:
+
+* Explore Examples or Tutorials: dive into NeMo by exploring our `examples `_ or :doc:`tutorials <./tutorials>`
+
+* Domain-Specific Documentation:
+
+ * For Large Language Models (LLMs), checkout out the :doc:`LLM <../nlp/nemo_megatron/intro>` documentation.
+ * For Multimodal tasks, refer to the :doc:`Multimodal <../multimodal/mllm/intro>` documentation.
+
+ * If you’re interested in Automatic Speech Recognition (ASR), explore the :doc:`ASR <../asr/intro>` documentation.
+ * For Text-to-Speech (TTS), find details in the :doc:`TTS <../tts/intro>` documentation.
+ * Lastly, for Vision Models, consult the :doc:`Vision Models <../vision/intro>` documentation.
+
+* `NeMo Primer `__: This tutorial provides a hands-on introduction to NeMo, PyTorch Lightning, and OmegaConf. It covers how to use, modify, save, and restore NeMo models.
+
+* `NeMo Models `__: In this tutorial, you'll learn the fundamentals of creating NeMo models.
+
+* NeMo Core Documentation: Explore the :doc:`NeMo Core <../core/core>` documentation for NeMo, which explains the inner workings of the framework.
+
diff --git a/examples/audio/audio_to_audio_eval.py b/examples/audio/audio_to_audio_eval.py
index 4e60b2ec2b52..c7b9db6efb80 100644
--- a/examples/audio/audio_to_audio_eval.py
+++ b/examples/audio/audio_to_audio_eval.py
@@ -75,7 +75,7 @@
from nemo.collections.audio.data import audio_to_audio_dataset
from nemo.collections.audio.data.audio_to_audio_lhotse import LhotseAudioToTargetDataset
-from nemo.collections.audio.metrics.audio import AudioMetricWrapper
+from nemo.collections.audio.metrics import AudioMetricWrapper, SquimMOSMetric, SquimObjectiveMetric
from nemo.collections.common.data.lhotse import get_lhotse_dataloader_from_config
from nemo.collections.common.parts.preprocessing import manifest
from nemo.core.config import hydra_runner
@@ -128,7 +128,17 @@ def get_evaluation_dataloader(config):
def get_metrics(cfg: AudioEvaluationConfig):
"""Prepare a dictionary with metrics."""
- available_metrics = ['sdr', 'sisdr', 'stoi', 'estoi', 'pesq']
+ available_metrics = [
+ 'sdr',
+ 'sisdr',
+ 'stoi',
+ 'estoi',
+ 'pesq',
+ 'squim_mos',
+ 'squim_stoi',
+ 'squim_pesq',
+ 'squim_si_sdr',
+ ]
metrics = dict()
for name in sorted(set(cfg.metrics)):
@@ -143,6 +153,14 @@ def get_metrics(cfg: AudioEvaluationConfig):
metric = AudioMetricWrapper(metric=ShortTimeObjectiveIntelligibility(fs=cfg.sample_rate, extended=True))
elif name == 'pesq':
metric = AudioMetricWrapper(metric=PerceptualEvaluationSpeechQuality(fs=cfg.sample_rate, mode='wb'))
+ elif name == 'squim_mos':
+ metric = AudioMetricWrapper(metric=SquimMOSMetric(fs=cfg.sample_rate))
+ elif name == 'squim_stoi':
+ metric = AudioMetricWrapper(metric=SquimObjectiveMetric(metric='stoi', fs=cfg.sample_rate))
+ elif name == 'squim_pesq':
+ metric = AudioMetricWrapper(metric=SquimObjectiveMetric(metric='pesq', fs=cfg.sample_rate))
+ elif name == 'squim_si_sdr':
+ metric = AudioMetricWrapper(metric=SquimObjectiveMetric(metric='si_sdr', fs=cfg.sample_rate))
else:
raise ValueError(f'Unexpected metric: {name}. Currently available metrics: {available_metrics}')
diff --git a/examples/llm/megatron_gpt_pretraining.py b/examples/llm/megatron_gpt_pretraining.py
index a88e01ba5dda..d3d049e4296e 100644
--- a/examples/llm/megatron_gpt_pretraining.py
+++ b/examples/llm/megatron_gpt_pretraining.py
@@ -65,7 +65,6 @@ def get_args():
checkpoint_callback = ModelCheckpoint(
every_n_train_steps=5000,
enable_nemo_ckpt_io=False,
- async_save=False,
)
callbacks = [checkpoint_callback]
@@ -92,6 +91,7 @@ def get_args():
logger=loggers,
callbacks=callbacks,
log_every_n_steps=1,
+ limit_val_batches=2,
plugins=nl.MegatronMixedPrecision(precision="bf16-mixed", amp_O2=False),
)
diff --git a/examples/multimodal/multimodal_llm/neva/conf/llava_config.yaml b/examples/multimodal/multimodal_llm/neva/conf/llava_config.yaml
index 3ec90b2d1b53..d8a31fa19ca9 100644
--- a/examples/multimodal/multimodal_llm/neva/conf/llava_config.yaml
+++ b/examples/multimodal/multimodal_llm/neva/conf/llava_config.yaml
@@ -71,10 +71,10 @@ model:
freeze: False
model_type: llama_2 # Only support nvgpt or llama_2
vision_encoder:
- from_pretrained: "openai/clip-vit-large-patch14" # path or name
+ from_pretrained: "openai/clip-vit-large-patch14-336" # path or name
from_hf: True
patch_dim: 14
- crop_size: [224, 224]
+ crop_size: [336, 336]
hidden_size: 1024 # could be found from model but tricky in code
vision_select_layer: -2 # default to the last layer
class_token_length: 1
diff --git a/examples/multimodal/vision_language_foundation/clip/conf/megatron_siglip_config.yaml b/examples/multimodal/vision_language_foundation/clip/conf/megatron_siglip_config.yaml
new file mode 100644
index 000000000000..59f21813ce01
--- /dev/null
+++ b/examples/multimodal/vision_language_foundation/clip/conf/megatron_siglip_config.yaml
@@ -0,0 +1,253 @@
+name: megatron_siglip
+restore_from_path: null # used when starting from a .nemo file
+
+trainer:
+ devices: 1
+ num_nodes: 1
+ accelerator: gpu
+ precision: bf16
+ logger: False # logger provided by exp_manager
+ enable_checkpointing: False
+ use_distributed_sampler: False
+ max_epochs: -1 # PTL default. In practice, max_steps will be reached first.
+ max_steps: 375000 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
+ log_every_n_steps: 10
+ val_check_interval: 100
+ check_val_every_n_epoch: null
+ limit_val_batches: 50
+ limit_test_batches: 500
+ accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models
+ gradient_clip_val: 1.0
+ benchmark: False
+ enable_model_summary: False # default PTL callback for this does not support model parallelism, instead we log manually
+
+exp_manager:
+ explicit_log_dir: null
+ exp_dir: null
+ name: megatron_siglip
+ create_wandb_logger: False
+ wandb_logger_kwargs:
+ project: null
+ name: null
+ resume_if_exists: True
+ resume_ignore_no_checkpoint: True
+ resume_from_checkpoint: ${model.resume_from_checkpoint}
+ create_checkpoint_callback: True
+ checkpoint_callback_params:
+ monitor: val_loss
+ save_top_k: 10
+ mode: min
+ always_save_nemo: False # saves nemo file during validation, not implemented for model parallel
+ save_nemo_on_train_end: False # not recommended when training large models on clusters with short time limits
+ filename: 'megatron_siglip--{val_loss:.2f}-{step}-{consumed_samples}'
+ model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}}
+ ema:
+ enable: False
+ decay: 0.9999
+ validate_original_weights: False
+ every_n_steps: 1
+ cpu_offload: False
+
+model:
+ precision: ${trainer.precision}
+ # specify micro_batch_size, global_batch_size, and model parallelism
+ # gradient accumulation will be done automatically based on data_parallel_size
+ micro_batch_size: 32 # limited by GPU memory
+ global_batch_size: 32 # will use more micro batches to reach global batch size
+ tensor_model_parallel_size: 1 # intra-layer model parallelism
+ pipeline_model_parallel_size: 1 # inter-layer model parallelism
+ virtual_pipeline_model_parallel_size: null # interleaved pipeline
+
+ restore_from_path: null # used in fine-tuning
+ # multimodal configs
+ output_dim: 1152
+ # As the number of devices used to train increases, so does the space complexity of
+ # the logit matrix. Using a naïve all-gather scheme, space complexity will be
+ # `O(n^2)`. Instead, complexity may become effectively linear if the flags
+ # `--gather-with-grad` and `--local-loss` are used. This alteration results in one-to-one
+ # numerical results as the naïve method.
+
+ use_siglip: True
+ mcore_gpt: True
+ transformer_engine: True
+
+ vision:
+ precision: ${trainer.precision}
+ # vision configs
+ patch_dim: 14
+ img_h: 378
+ img_w: 378
+ image_mean: null
+ image_std: null
+ num_channels: 3
+ drop_patch_rate: 0.0
+ drop_path_rate: 0.0
+ global_average_pool: False
+ output_dim: ${model.output_dim}
+ class_token_length: 0
+ preprocess_layernorm: True # apply layer norm to embedded tokens
+
+ # model architecture
+ encoder_seq_length: 196
+ max_position_embeddings: ${.encoder_seq_length}
+ position_embedding_type: learned_absolute
+ num_layers: 27
+ hidden_size: 1152
+ ffn_hidden_size: 4304 # Transformer FFN hidden size. Usually 4 * hidden_size.
+ num_attention_heads: 16
+ init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.')
+ use_scaled_init_method: True # use scaled residuals initialization
+ hidden_dropout: 0. # Dropout probability for hidden state transformer.
+ attention_dropout: 0.
+ kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null
+ apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number.
+ normalization: layernorm # Type of normalization layers
+ layernorm_epsilon: 1e-5
+ do_layer_norm_weight_decay: False # True means weight decay on all params
+ pre_process: True # add embedding
+ post_process: True # add pooler
+ persist_layer_norm: True # Use of persistent fused layer norm kernel.
+
+ ## Activation Checkpointing
+ activations_checkpoint_granularity: null # 'selective' or 'full'
+ activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective'
+ activations_checkpoint_num_layers: null # not used with 'selective'
+ sequence_parallel: False
+
+ # precision
+ native_amp_init_scale: 4294967296 # 2 ** 32
+ native_amp_growth_interval: 1000
+ hysteresis: 2 # Gradient scale hysteresis
+ fp32_residual_connection: False # Move residual connections to fp32
+ fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16
+
+ # model fusions
+ masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask.
+ bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition.
+
+ use_cpu_initialization: False # Init weights on the CPU (slow for large models)
+ onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter.
+ gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism.
+ openai_gelu: True
+ bias_activation_fusion: False
+ activation: approx-gelu
+ megatron_legacy: False
+
+
+ text:
+ precision: ${trainer.precision}
+ # text configs
+ output_dim: ${model.output_dim}
+
+ # model architecture
+ encoder_seq_length: 64
+ max_position_embeddings: ${.encoder_seq_length}
+ position_embedding_type: learned_absolute
+ num_layers: 27
+ hidden_size: 1152
+ ffn_hidden_size: 4304 # Transformer FFN hidden size. Usually 4 * hidden_size.
+ num_attention_heads: 16
+ init_method_std: 0.02 # Standard deviation of the zero mean normal distribution used for weight initialization.')
+ use_scaled_init_method: True # use scaled residuals initialization
+ hidden_dropout: 0. # Dropout probability for hidden state transformer.
+ attention_dropout: 0.
+ kv_channels: null # Projection weights dimension in multi-head attention. Set to hidden_size // num_attention_heads if null
+ apply_query_key_layer_scaling: True # scale Q * K^T by 1 / layer-number.
+ normalization: layernorm # Type of normalization layers
+ layernorm_epsilon: 1e-5
+ do_layer_norm_weight_decay: False # True means weight decay on all params
+ pre_process: True # add embedding
+ post_process: True # add pooler
+ persist_layer_norm: True # Use of persistent fused layer norm kernel.
+
+ ## Activation Checkpointing
+ activations_checkpoint_granularity: null # 'selective' or 'full'
+ activations_checkpoint_method: null # 'uniform', 'block', not used with 'selective'
+ activations_checkpoint_num_layers: null # not used with 'selective'
+ num_micro_batches_with_partial_activation_checkpoints: null
+ activations_checkpoint_layers_per_pipeline: null
+ sequence_parallel: False
+
+ # precision
+ native_amp_init_scale: 4294967296 # 2 ** 32
+ native_amp_growth_interval: 1000
+ hysteresis: 2 # Gradient scale hysteresis
+ fp32_residual_connection: False # Move residual connections to fp32
+ fp16_lm_cross_entropy: False # Move the cross entropy unreduced loss calculation for lm head to fp16
+
+ # model fusions
+ masked_softmax_fusion: True # Use a kernel that fuses the attention softmax with it's mask.
+ bias_dropout_add_fusion: True # Use a kernel that fuses the bias addition, dropout and residual connection addition.
+
+ use_cpu_initialization: False # Init weights on the CPU (slow for large models)
+ onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter.
+ gradient_accumulation_fusion: False # Fuse weight gradient accumulation to GEMMs. Only used with pipeline parallelism.
+ openai_gelu: True
+ bias_activation_fusion: False
+ megatron_legacy: False
+
+ fp8: False # enables fp8 in TransformerLayer forward
+ fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3
+ fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID
+ fp8_margin: 0 # scaling margin
+ fp8_interval: 1 # scaling update interval
+ fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor
+ fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
+ use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
+ activation: approx-gelu
+
+ # Megatron O2-style half-precision
+ megatron_amp_O2: True # Enable O2-level automatic mixed precision using main parameters
+ grad_allreduce_chunk_size_mb: 125
+ grad_div_ar_fusion: True # Fuse grad division into torch.distributed.all_reduce
+
+ # miscellaneous
+ seed: 1234
+ resume_from_checkpoint: null # manually set the checkpoint file to load from
+ apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
+ gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
+
+ tokenizer:
+ library: 'huggingface'
+ type: 'google/siglip-so400m-patch14-384'
+ model: null
+ vocab_file: null
+ merge_file: null
+ delimiter: null # only used for tabular tokenizer
+ sentencepiece_legacy: False # Legacy=True allows you to add special tokens to sentencepiece tokenizers.
+ make_vocab_size_divisible_by: 128 # Pad the vocab size to be divisible by this value for computation efficiency.
+
+ data:
+ num_workers: 8
+ train:
+ dataset_path: # List of paths to pkl files or tar files
+ - /datasets/coyo/test.pkl
+ validation: # List of paths to pkl files or tar files
+ dataset_path:
+ - /datasets/coyo/test.pkl
+ webdataset:
+ infinite_sampler: False
+ local_root_path: /datasets/coyo
+
+ imagenet_val: null # Path to imagenet val set for conducting zero shot evaluation.
+
+ # Nsys profiling options
+ nsys_profile:
+ enabled: False
+ start_step: 10 # Global batch to start profiling
+ end_step: 10 # Global batch to end profiling
+ ranks: [ 0 ] # Global rank IDs to profile
+ gen_shape: False # Generate model and kernel details including input shapes
+
+ optim:
+ name: fused_adam
+ lr: 1e-3
+ weight_decay: 0.2
+ betas:
+ - 0.9
+ - 0.98
+ sched:
+ name: CosineAnnealing
+ warmup_steps: 2000
+ constant_steps: 0
+ min_lr: 1e-5
\ No newline at end of file
diff --git a/examples/multimodal/vision_language_foundation/clip/convert_external_clip_to_nemo.py b/examples/multimodal/vision_language_foundation/clip/convert_external_clip_to_nemo.py
index 9af25181d07e..178140aac828 100644
--- a/examples/multimodal/vision_language_foundation/clip/convert_external_clip_to_nemo.py
+++ b/examples/multimodal/vision_language_foundation/clip/convert_external_clip_to_nemo.py
@@ -283,7 +283,10 @@ def convert(local_rank, rank, world_size, args):
if __name__ == '__main__':
- logging.warning("This script is going to be deprecated soon. Please use ")
+ logging.warning(
+ "This script is going to be deprecated soon. Please use "
+ "`scripts/checkpoint_converters/convert_clip_hf_to_nemo.py`"
+ )
args = get_args()
local_rank, rank, world_size = initialize_distributed(args)
convert(local_rank, rank, world_size, args)
diff --git a/examples/nlp/information_retrieval/conf/megatron_bert_embedding_config.yaml b/examples/nlp/information_retrieval/conf/megatron_bert_embedding_config.yaml
index 0b57313fb0a0..7e4ecf09f5a0 100644
--- a/examples/nlp/information_retrieval/conf/megatron_bert_embedding_config.yaml
+++ b/examples/nlp/information_retrieval/conf/megatron_bert_embedding_config.yaml
@@ -77,6 +77,11 @@ model:
vocab_file: null
merge_file: null
+ # embedding-specific arguemnts
+ softmax_temp: 0.02 # softmax temp for contrastive loss
+ global_inbatch_negatives: True # whether to use in-batch negatives from other ranks during training
+ backprop_type: 'global' # whether to use `global` or `local` backpropagation during training. Refer to Flava paper for details.
+
# precision
native_amp_init_scale: 4294967296 # 2 ** 32
native_amp_growth_interval: 1000
@@ -93,7 +98,7 @@ model:
use_cpu_initialization: False # Init weights on the CPU (slow for large models)
onnx_safe: False # Use work-arounds for known problems with Torch ONNX exporter.
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
-
+
## Activation Checkpointing
# NeMo Megatron supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed.
# These memory intensive activations are also less compute intensive which makes activation checkpointing more efficient for LLMs (20B+).
@@ -127,7 +132,7 @@ model:
# Path to data must be specified by the user.
data_train: null
data_validation: null
- hard_negatives_to_train: 4
+ hard_negatives_to_train: 4 # number of hard negatives to use per example for training
index_mapping_dir: null # path to save index mapping .npy files, by default will save in the same location as data_prefix
data_impl: mmap
splits_string: 900,50,50
diff --git a/examples/nlp/information_retrieval/megatron_bert_embedding_finetuning.py b/examples/nlp/information_retrieval/megatron_bert_embedding_finetuning.py
index 04d12fed9eca..7486b470425a 100644
--- a/examples/nlp/information_retrieval/megatron_bert_embedding_finetuning.py
+++ b/examples/nlp/information_retrieval/megatron_bert_embedding_finetuning.py
@@ -37,7 +37,7 @@ def main(cfg) -> None:
model_cfg = MegatronBertEmbeddingModel.merge_cfg_with(cfg.restore_from_path, cfg)
assert (
- model_cfg.micro_batch_size * cfg.trainer.devices == model_cfg.global_batch_size
+ model_cfg.micro_batch_size * cfg.trainer.devices * cfg.trainer.num_nodes == model_cfg.global_batch_size
), "Gradiant accumulation is not supported for contrastive learning yet"
OmegaConf.set_struct(model_cfg, True)
diff --git a/examples/nlp/information_retrieval/megatron_bert_embedding_generate.py b/examples/nlp/information_retrieval/megatron_bert_embedding_generate.py
new file mode 100644
index 000000000000..9814129b837d
--- /dev/null
+++ b/examples/nlp/information_retrieval/megatron_bert_embedding_generate.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.multiprocessing as mp
+from omegaconf.omegaconf import OmegaConf, open_dict
+
+from nemo.collections.nlp.models.information_retrieval.megatron_bert_embedding_model import MegatronBertEmbeddingModel
+from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronBertTrainerBuilder
+from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector
+from nemo.core.config import hydra_runner
+from nemo.utils import logging
+from nemo.utils.exp_manager import exp_manager
+
+
+@hydra_runner(config_path="conf", config_name="megatron_bert_embedding_config")
+def main(cfg) -> None:
+ if cfg.model.data.dataloader_type != "LDDL":
+ mp.set_start_method("spawn", force=True)
+
+ logging.info("\n\n************** Experiment configuration ***********")
+ logging.info(f'\n{OmegaConf.to_yaml(cfg)}')
+
+ trainer = MegatronBertTrainerBuilder(cfg).create_trainer()
+ exp_manager(trainer, cfg.exp_manager)
+
+ model_cfg = MegatronBertEmbeddingModel.merge_cfg_with(cfg.restore_from_path, cfg)
+
+ OmegaConf.set_struct(model_cfg, True)
+ with open_dict(model_cfg):
+ model_cfg.precision = trainer.precision
+
+ logging.info(f"Loading model from {cfg.restore_from_path}")
+ model = MegatronBertEmbeddingModel.restore_from(
+ restore_path=cfg.restore_from_path,
+ trainer=trainer,
+ save_restore_connector=NLPSaveRestoreConnector(),
+ override_config_path=model_cfg,
+ strict=True,
+ )
+
+ trainer.test(model)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
index 809ca30ca5ed..85609c2dd9b0 100755
--- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -276,6 +276,7 @@ model:
seq_length: ${model.encoder_seq_length}
skip_warmup: True
num_workers: 2
+ num_dataset_builder_threads: 1
dataloader_type: single # cyclic
reset_position_ids: False # Reset position ids after end-of-document token
reset_attention_mask: False # Reset attention mask after end-of-document token
@@ -284,7 +285,8 @@ model:
no_seqlen_plus_one_input_tokens: False # Set to True to disable fetching (sequence length + 1) input tokens, instead get (sequence length) input tokens and mask the last token
pad_samples_to_global_batch_size: False # Set to True if you want to pad the last partial batch with -1's to equal global batch size
shuffle_documents: True # Set to False to disable documents shuffling. Sample index will still be shuffled
- exchange_indices_distributed: False # Set to True to exchange indices via torch.distributed instead of filesystem
+ exchange_indices_distributed: False # Set to True to exchange indices via torch.distributed instead of filesystem
+ data_cache_generation_only: False # Set to True to generate only the data cache and stop the training script
# Nsys profiling options
nsys_profile:
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_ptq.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_ptq.yaml
index c70719f51210..f603ebb58eb7 100644
--- a/examples/nlp/language_modeling/conf/megatron_gpt_ptq.yaml
+++ b/examples/nlp/language_modeling/conf/megatron_gpt_ptq.yaml
@@ -44,4 +44,5 @@ export:
inference_pipeline_parallel: 1 # Default using 1 PP for inference
dtype: ${trainer.precision} # Default precision data type
save_path: llama2-7b-${quantization.algorithm}.qnemo # Path where the quantized model will be saved
- compress: false # Wheter save_path should be a tarball or a directory
+ compress: false # Whether save_path should be a tarball or a directory
+ sample_output: true # Whether to run a sample prompt before saving
diff --git a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_finetuning_config.yaml b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_finetuning_config.yaml
index 6517b62010b4..06551f46486c 100644
--- a/examples/nlp/language_modeling/tuning/conf/megatron_gpt_finetuning_config.yaml
+++ b/examples/nlp/language_modeling/tuning/conf/megatron_gpt_finetuning_config.yaml
@@ -158,6 +158,7 @@ model:
index_mapping_dir: null # Path to a directory to write index mapping files.
prompt_template: "{input} {output}" # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}"
truncation_method: 'right' # Truncation from which position, Options: ['left', 'right']
+ global_sample_mapping: False # Whether to shuffle the replicated data all together, or shuffle the dataset within each epoch
validation_ds:
file_names: ??? # Path to a list of JSONL files corresponding to the source data. Data format is identical to train_ds.
names: null # Names of the corresponding datasets used to log metrics.
@@ -181,6 +182,7 @@ model:
prompt_template: ${model.data.train_ds.prompt_template} # fstring to use for assistant prompt. Example: "Q: {input}\nA: {output}"
tokens_to_generate: 32 # decide how many tokens we want to generate to evaluate performance with string metrics
truncation_method: 'right' # Truncation from which position, Options: ['left', 'right']
+ global_sample_mapping: False # Whether to shuffle the replicated data all together, or shuffle the dataset within each epoch
metric:
name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss']
average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported.
@@ -208,6 +210,7 @@ model:
prompt_template: ${model.data.train_ds.prompt_template}
tokens_to_generate: 32 # decide how many tokens we want to generate to evaluate performance with string metrics
truncation_method: 'right' # Truncation from which position, Options: ['left', 'right']
+ global_sample_mapping: False # Whether to shuffle the replicated data all together, or shuffle the dataset within each epoch
metric:
name: "loss" # Name of the evaluation metric to use. Options: ['exact_string_match', 'loss']
average: null # Average the metric over the dataset. Options: ['macro', 'micro']. Works only for 'F1', 'accuracy' etc. Refer to torchmetrics for metrics where this is supported.
diff --git a/nemo/collections/asr/data/audio_to_text_lhotse_prompted.py b/nemo/collections/asr/data/audio_to_text_lhotse_prompted.py
index e9e97d3d32d7..4779e3677b05 100644
--- a/nemo/collections/asr/data/audio_to_text_lhotse_prompted.py
+++ b/nemo/collections/asr/data/audio_to_text_lhotse_prompted.py
@@ -173,7 +173,7 @@ def canary(
role="assistant",
slots={
"text": ' '.join(s.text for s in cut.supervisions),
- formatter.PROMPT_LANGUAGE_SLOT: cut.custom["target_lang"],
+ formatter.PROMPT_LANGUAGE_SLOT: cut.supervisions[0].language,
},
),
]
diff --git a/nemo/collections/asr/metrics/bleu.py b/nemo/collections/asr/metrics/bleu.py
index 011e3efe0c6a..32bd25d952d4 100644
--- a/nemo/collections/asr/metrics/bleu.py
+++ b/nemo/collections/asr/metrics/bleu.py
@@ -34,12 +34,12 @@ def move_dimension_to_the_front(tensor, dim_index):
# TODO: Add documentation
class BLEU(SacreBLEUScore):
"""
- This metric computes numerator, denominator, hypotheses lengths, and target lengths for Overall Bilingual Evaluation Understudy (BLEU)
- between prediction and reference texts. When doing distributed training/evaluation the result of
+ This metric computes numerator, denominator, hypotheses lengths, and target lengths for Overall Bilingual Evaluation Understudy (BLEU)
+ between prediction and reference texts. When doing distributed training/evaluation the result of
``res=BLEU.(predictions, predictions_lengths, targets, target_lengths)``
calls will be all-reduced between all workers using SUM operations.
- If used with PytorchLightning LightningModule, include bleu_num bleur_den, bleu_pred_len, and bleu_target_len values inside
+ If used with PytorchLightning LightningModule, include bleu_num bleur_den, bleu_pred_len, and bleu_target_len values inside
validation_step results. Then aggregate (sum) then at the end of validation epoch to correctly compute validation BLEUR.
Example:
@@ -99,7 +99,6 @@ def __init__(
smooth=smooth,
dist_sync_on_step=dist_sync_on_step,
)
- self.has_spl_tokens = False
self.decoding = decoding
self.decode = None
if isinstance(self.decoding, AbstractRNNTDecoding):
@@ -113,7 +112,6 @@ def __init__(
fold_consecutive=self.fold_consecutive,
)
elif isinstance(self.decoding, AbstractMultiTaskDecoding):
- self.has_spl_tokens = True
self.decode = lambda predictions, prediction_lengths, predictions_mask, input_ids, targets: self.decoding.decode_predictions_tensor(
encoder_hidden_states=predictions,
encoder_input_mask=predictions_mask,
@@ -165,10 +163,6 @@ def update(
references.append(reference)
hypotheses, _ = self.decode(predictions, predictions_lengths, predictions_mask, input_ids, targets)
- if self.has_spl_tokens:
- hypotheses = [self.decoding.strip_special_tokens(hyp) for hyp in hypotheses]
- references = [self.decoding.strip_special_tokens(ref) for ref in references]
-
if self.log_prediction:
logging.info(f"\n")
logging.info(f"reference:{references[0]}")
@@ -185,7 +179,7 @@ def compute(self, return_all_metrics=True, prefix="", suffix=""):
only BLEU. Default: True.
prefix: str to prepend to metric value keys.
suffix: str to append to metric value keys.
-
+
Returns:
Dict: key-value pairs of BLEU metrics and values. Keys are prepended and appended with prefix
and suffix flags, respectively.
@@ -205,7 +199,11 @@ def compute(self, return_all_metrics=True, prefix="", suffix=""):
# Adding wrapper to avoid imports and extra variables over the namespace
def _compute_bleu(
- self, predictions_lengths, targets_lengths, numerator, denominator,
+ self,
+ predictions_lengths,
+ targets_lengths,
+ numerator,
+ denominator,
):
return _bleu_score_compute(
predictions_lengths, targets_lengths, numerator, denominator, self.n_gram, self.weights, self.smooth
diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py
index 1cb4cf06eaca..a135e5c51e84 100644
--- a/nemo/collections/asr/metrics/wer.py
+++ b/nemo/collections/asr/metrics/wer.py
@@ -148,8 +148,8 @@ def word_error_rate_detail(
def word_error_rate_per_utt(hypotheses: List[str], references: List[str], use_cer=False) -> Tuple[List[float], float]:
"""
Computes Word Error Rate per utterance and the average WER
- between two texts represented as corresponding lists of string.
-
+ between two texts represented as corresponding lists of string.
+
Hypotheses and references must have same length.
Args:
@@ -263,7 +263,6 @@ def __init__(
self.fold_consecutive = fold_consecutive
self.batch_dim_index = batch_dim_index
- self.has_spl_tokens = False
self.decode = None
if isinstance(self.decoding, AbstractRNNTDecoding):
self.decode = lambda predictions, predictions_lengths, predictions_mask, input_ids, targets: self.decoding.rnnt_decoder_predictions_tensor(
@@ -276,7 +275,6 @@ def __init__(
fold_consecutive=self.fold_consecutive,
)
elif isinstance(self.decoding, AbstractMultiTaskDecoding):
- self.has_spl_tokens = True
self.decode = lambda predictions, prediction_lengths, predictions_mask, input_ids, targets: self.decoding.decode_predictions_tensor(
encoder_hidden_states=predictions,
encoder_input_mask=predictions_mask,
@@ -326,10 +324,6 @@ def update(
references.append(reference)
hypotheses, _ = self.decode(predictions, predictions_lengths, predictions_mask, input_ids, targets)
- if self.has_spl_tokens:
- hypotheses = [self.decoding.strip_special_tokens(hyp) for hyp in hypotheses]
- references = [self.decoding.strip_special_tokens(ref) for ref in references]
-
if self.log_prediction:
logging.info(f"\n")
logging.info(f"reference:{references[0]}")
diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py
index dbf8013af331..5ad91e75a867 100644
--- a/nemo/collections/asr/models/aed_multitask_models.py
+++ b/nemo/collections/asr/models/aed_multitask_models.py
@@ -918,19 +918,6 @@ def _transcribe_output_processing(self, outputs, trcfg: MultiTaskTranscriptionCo
return_hypotheses=trcfg.return_hypotheses,
)
- if trcfg.return_hypotheses:
- for hyp in best_hypotheses:
- hyp.text = self.decoding.strip_special_tokens(hyp.text)
- if all_hypotheses is not None:
- for i in range(len(all_hypotheses)):
- for j in range(len(all_hypotheses[i])):
- all_hypotheses[i][j].text = self.decoding.strip_special_tokens(all_hypotheses[i][j].text)
- else:
- best_hypotheses = [self.decoding.strip_special_tokens(text) for text in best_hypotheses]
- if all_hypotheses is not None:
- for i in range(len(all_hypotheses)):
- all_hypotheses[i] = [self.decoding.strip_special_tokens(text) for text in all_hypotheses[i]]
-
del enc_states, enc_mask, decoder_input_ids
if all_hypotheses is None:
return best_hypotheses
diff --git a/nemo/collections/asr/parts/submodules/multitask_beam_decoding.py b/nemo/collections/asr/parts/submodules/multitask_beam_decoding.py
index c6dc28a47480..ab3938eebe35 100644
--- a/nemo/collections/asr/parts/submodules/multitask_beam_decoding.py
+++ b/nemo/collections/asr/parts/submodules/multitask_beam_decoding.py
@@ -102,8 +102,7 @@ class TransformerAEDBeamInfer(AEDBeamInfer, Typing):
@property
def input_types(self):
- """Returns definitions of module input ports.
- """
+ """Returns definitions of module input ports."""
# Input can be of dimention -
# ('B', 'T', 'D') [Log probs] or ('B', 'T') [Labels]
@@ -116,8 +115,7 @@ def input_types(self):
@property
def output_types(self):
- """Returns definitions of module output ports.
- """
+ """Returns definitions of module output ports."""
return {"predictions": [NeuralType(elements_type=HypothesisType())]}
def __init__(
@@ -141,15 +139,18 @@ def __init__(
preserve_alignments=preserve_alignments,
)
self.beam_size = beam_size
+ self.bos = tokenizer.bos
+ self.pad = tokenizer.pad
+ self.eos = tokenizer.eos
self.beam_search = BeamSearchSequenceGenerator(
embedding=transformer_decoder.embedding,
decoder=transformer_decoder.decoder,
log_softmax=log_softmax_module,
max_sequence_length=transformer_decoder.max_sequence_length,
beam_size=beam_size,
- bos=tokenizer.bos_id,
- pad=tokenizer.pad_id,
- eos=tokenizer.eos_id,
+ bos=self.bos,
+ pad=self.pad,
+ eos=self.eos,
len_pen=length_penalty,
max_delta_length=max_generation_delta,
)
@@ -196,9 +197,9 @@ def forward(
for i in range(len(topk_hypotheses)):
hypotheses = [Hypothesis(score=0.0, y_sequence=[], timestep=[]) for _ in range(self.beam_size)]
# Pack results into Hypotheses
- packed_result.append(
- NBestHypotheses(pack_hypotheses(hypotheses, topk_hypotheses[i], beam_scores[i]))
- )
+ hypotheses = pack_hypotheses(hypotheses, topk_hypotheses[i], beam_scores[i])
+ self.format_hypotheses(hypotheses, decoder_input_ids)
+ packed_result.append(NBestHypotheses(hypotheses))
else:
beam_scores = [None for _ in range(len(best_hypo))]
best_hypo = best_hypo.detach().cpu()
@@ -207,9 +208,35 @@ def forward(
]
# Pack results into Hypotheses
packed_result = pack_hypotheses(hypotheses, best_hypo, beam_scores)
+ self.format_hypotheses(packed_result, decoder_input_ids)
return (packed_result,)
+ def format_hypotheses(self, packed_result: List[Hypothesis], decoder_input_ids: torch.Tensor | None) -> None:
+ """
+ For each hypothesis in the mini-batch:
+ * Remove the decoder input ids (prompt) from the predictions
+ * Remove BOS, EOS, and PAD ids from the predictions.
+ Modifies results in-place.
+ """
+ if decoder_input_ids is not None:
+ assert (
+ len(packed_result) == decoder_input_ids.shape[0]
+ ), f"Mismatching number of examples {len(packed_result)=} {decoder_input_ids.shape[0]=}"
+ decoder_input_ids = decoder_input_ids.detach().cpu()
+ for hyp, prefix in zip(packed_result, decoder_input_ids):
+ assert (
+ hyp.y_sequence[: prefix.shape[0]] == prefix
+ ).all(), f"The decoder input IDs were not found at the beginning of prediction: {hyp.y_sequence=} {prefix=})"
+ hyp.y_sequence = hyp.y_sequence[prefix.shape[0] :]
+ for hyp in packed_result:
+ ids = hyp.y_sequence
+ pos = -1
+ while ids[pos] == self.pad or ids[pos] == self.eos:
+ pos -= 1
+ if pos < -1:
+ hyp.y_sequence = ids[: pos + 1]
+
@dataclass
class AEDBeamInferConfig:
diff --git a/nemo/collections/asr/parts/submodules/multitask_decoding.py b/nemo/collections/asr/parts/submodules/multitask_decoding.py
index c336ae7d4170..e2ed2ca5c4bf 100644
--- a/nemo/collections/asr/parts/submodules/multitask_decoding.py
+++ b/nemo/collections/asr/parts/submodules/multitask_decoding.py
@@ -295,17 +295,6 @@ def decode_ids_to_langs(self, tokens: List[int]) -> List[str]:
"""
raise NotImplementedError()
- def strip_special_tokens(self, text: str):
- """
- assuming all special tokens are of format
- Note that if any label/pred is of format , it will be stripped
- """
- assert isinstance(text, str), f"Expected str, got {type(text)}"
- text = re.sub(r'<[^>]+>', '', text)
- # strip spaces at the beginning and end;
- # this is training data artifact, will be fixed in future (@kpuvvada)
- return text.strip()
-
class MultiTaskDecoding(AbstractMultiTaskDecoding):
"""
diff --git a/nemo/collections/asr/parts/utils/transcribe_utils.py b/nemo/collections/asr/parts/utils/transcribe_utils.py
index c270e5c3a0f7..c26fa6f4984d 100644
--- a/nemo/collections/asr/parts/utils/transcribe_utils.py
+++ b/nemo/collections/asr/parts/utils/transcribe_utils.py
@@ -289,7 +289,7 @@ def prepare_audio_data(cfg: DictConfig) -> Tuple[List[str], bool]:
with open(cfg.dataset_manifest, "rt") as fh:
for line in fh:
item = json.loads(line)
- item["audio_filepath"] = get_full_path(item["audio_filepath"], cfg.dataset_manifest)
+ item[audio_key] = get_full_path(item[audio_key], cfg.dataset_manifest)
if item.get("duration") is None and cfg.presort_manifest:
raise ValueError(
f"Requested presort_manifest=True, but line {line} in manifest {cfg.dataset_manifest} lacks a 'duration' field."
diff --git a/nemo/collections/audio/metrics/__init__.py b/nemo/collections/audio/metrics/__init__.py
index d9155f923f18..20c8fd2fa4e2 100644
--- a/nemo/collections/audio/metrics/__init__.py
+++ b/nemo/collections/audio/metrics/__init__.py
@@ -11,3 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
+from nemo.collections.audio.metrics.audio import AudioMetricWrapper
+from nemo.collections.audio.metrics.squim import SquimMOSMetric, SquimObjectiveMetric
diff --git a/nemo/collections/audio/metrics/audio.py b/nemo/collections/audio/metrics/audio.py
index 096700eff24a..0f8b5bee0fd2 100644
--- a/nemo/collections/audio/metrics/audio.py
+++ b/nemo/collections/audio/metrics/audio.py
@@ -21,6 +21,7 @@
from torchmetrics.audio.sdr import ScaleInvariantSignalDistortionRatio, SignalDistortionRatio
from torchmetrics.audio.snr import ScaleInvariantSignalNoiseRatio, SignalNoiseRatio
from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
+from nemo.collections.audio.metrics.squim import SquimMOSMetric, SquimObjectiveMetric
from nemo.utils import logging
@@ -34,6 +35,8 @@
SignalNoiseRatio,
PerceptualEvaluationSpeechQuality,
ShortTimeObjectiveIntelligibility,
+ SquimMOSMetric,
+ SquimObjectiveMetric,
]
diff --git a/nemo/collections/audio/metrics/squim.py b/nemo/collections/audio/metrics/squim.py
new file mode 100644
index 000000000000..c20be43f79f8
--- /dev/null
+++ b/nemo/collections/audio/metrics/squim.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+import torch
+from torchmetrics import Metric
+from nemo.utils import logging
+
+try:
+ import torchaudio
+
+ HAVE_TORCHAUDIO = True
+except ModuleNotFoundError:
+ HAVE_TORCHAUDIO = False
+
+
+class SquimMOSMetric(Metric):
+ """A metric calculating the average Torchaudio Squim MOS.
+
+ Args:
+ fs: sampling rate of the input signals
+ """
+
+ sample_rate: int = 16000 # sample rate of the model
+ mos_sum: torch.Tensor
+ num_examples: torch.Tensor
+ higher_is_better: bool = True
+
+ def __init__(self, fs: int, **kwargs: Any):
+ super().__init__(**kwargs)
+
+ if not HAVE_TORCHAUDIO:
+ raise ModuleNotFoundError(f"{self.__class__.__name__} metric needs `torchaudio`.")
+
+ if fs != self.sample_rate:
+ # Resampler: kaiser_best
+ self._squim_mos_metric_resampler = torchaudio.transforms.Resample(
+ orig_freq=fs,
+ new_freq=self.sample_rate,
+ lowpass_filter_width=64,
+ rolloff=0.9475937167399596,
+ resampling_method='sinc_interp_kaiser',
+ beta=14.769656459379492,
+ )
+ logging.warning('Input signals will be resampled from fs=%d to %d Hz', fs, self.sample_rate)
+ self.fs = fs
+
+ # MOS model
+ self._squim_mos_metric_model = torchaudio.pipelines.SQUIM_SUBJECTIVE.get_model()
+
+ self.add_state('mos_sum', default=torch.tensor(0.0), dist_reduce_fx='sum')
+ self.add_state('num_examples', default=torch.tensor(0), dist_reduce_fx='sum')
+ logging.debug('Setup metric %s with input fs=%s', self.__class__.__name__, self.fs)
+
+ def update(self, preds: torch.Tensor, target: torch.Tensor) -> None:
+ """Update the metric by calculating the MOS score for the current batch.
+
+ Args:
+ preds: tensor with predictions, shape (B, T)
+ target: tensor with target signals, shape (B, T). Target can be a non-matching reference.
+ """
+ if self.fs != self.sample_rate:
+ preds = self._squim_mos_metric_resampler(preds)
+ target = self._squim_mos_metric_resampler(target)
+
+ if preds.ndim == 1:
+ # Unsqueeze batch dimension
+ preds = preds.unsqueeze(0)
+ target = target.unsqueeze(0)
+ elif preds.ndim > 2:
+ raise ValueError(f'Expected 1D or 2D signals, got {preds.ndim}D signals')
+
+ mos_batch = self._squim_mos_metric_model(preds, target)
+
+ self.mos_sum += mos_batch.sum()
+ self.num_examples += mos_batch.numel()
+
+ def compute(self) -> torch.Tensor:
+ """Compute the underlying metric."""
+ return self.mos_sum / self.num_examples
+
+ def state_dict(self, *args, **kwargs):
+ """Do not save the MOS model and resampler in the state dict."""
+ state_dict = super().state_dict(*args, **kwargs)
+ # Do not include resampler or mos_model in the state dict
+ remove_keys = [
+ key
+ for key in state_dict.keys()
+ if '_squim_mos_metric_resampler' in key or '_squim_mos_metric_model' in key
+ ]
+ for key in remove_keys:
+ del state_dict[key]
+ return state_dict
+
+
+class SquimObjectiveMetric(Metric):
+ """A metric calculating the average Torchaudio Squim objective metric.
+
+ Args:
+ fs: sampling rate of the input signals
+ metric: the objective metric to calculate. One of 'stoi', 'pesq', 'si_sdr'
+ """
+
+ sample_rate: int = 16000 # sample rate of the model
+ metric_sum: torch.Tensor
+ num_examples: torch.Tensor
+ higher_is_better: bool = True
+
+ def __init__(self, fs: int, metric: str, **kwargs: Any):
+ super().__init__(**kwargs)
+
+ if not HAVE_TORCHAUDIO:
+ raise ModuleNotFoundError(f"{self.__class__.__name__} needs `torchaudio`.")
+
+ if fs != self.sample_rate:
+ # Resampler: kaiser_best
+ self._squim_objective_metric_resampler = torchaudio.transforms.Resample(
+ orig_freq=fs,
+ new_freq=self.sample_rate,
+ lowpass_filter_width=64,
+ rolloff=0.9475937167399596,
+ resampling_method='sinc_interp_kaiser',
+ beta=14.769656459379492,
+ )
+ logging.warning('Input signals will be resampled from fs=%d to %d Hz', fs, self.sample_rate)
+ self.fs = fs
+
+ if metric not in ['stoi', 'pesq', 'si_sdr']:
+ raise ValueError(f'Unsupported metric {metric}. Supported metrics are "stoi", "pesq", "si_sdr".')
+
+ self.metric = metric
+
+ # Objective model
+ self._squim_objective_metric_model = torchaudio.pipelines.SQUIM_OBJECTIVE.get_model()
+
+ self.add_state('metric_sum', default=torch.tensor(0.0), dist_reduce_fx='sum')
+ self.add_state('num_examples', default=torch.tensor(0), dist_reduce_fx='sum')
+ logging.debug('Setup %s with metric=%s, input fs=%s', self.__class__.__name__, self.metric, self.fs)
+
+ def update(self, preds: torch.Tensor, target: Any = None) -> None:
+ """Update the metric by calculating the selected metric score for the current batch.
+
+ Args:
+ preds: tensor with predictions, shape (B, T)
+ target: None, not used. Keeping for interfacfe compatibility with other metrics.
+ """
+ if self.fs != self.sample_rate:
+ preds = self._squim_objective_metric_resampler(preds)
+
+ if preds.ndim == 1:
+ # Unsqueeze batch dimension
+ preds = preds.unsqueeze(0)
+ elif preds.ndim > 2:
+ raise ValueError(f'Expected 1D or 2D signals, got {preds.ndim}D signals')
+
+ stoi_batch, pesq_batch, si_sdr_batch = self._squim_objective_metric_model(preds)
+
+ if self.metric == 'stoi':
+ metric_batch = stoi_batch
+ elif self.metric == 'pesq':
+ metric_batch = pesq_batch
+ elif self.metric == 'si_sdr':
+ metric_batch = si_sdr_batch
+ else:
+ raise ValueError(f'Unknown metric {self.metric}')
+
+ self.metric_sum += metric_batch.sum()
+ self.num_examples += metric_batch.numel()
+
+ def compute(self) -> torch.Tensor:
+ """Compute the underlying metric."""
+ return self.metric_sum / self.num_examples
+
+ def state_dict(self, *args, **kwargs):
+ """Do not save the MOS model and resampler in the state dict."""
+ state_dict = super().state_dict(*args, **kwargs)
+ # Do not include resampler or mos_model in the state dict
+ remove_keys = [
+ key
+ for key in state_dict.keys()
+ if '_squim_objective_metric_resampler' in key or '_squim_objective_metric_model' in key
+ ]
+ for key in remove_keys:
+ del state_dict[key]
+ return state_dict
diff --git a/nemo/collections/audio/modules/transforms.py b/nemo/collections/audio/modules/transforms.py
index ecbdca88e22b..6839ae0f7598 100644
--- a/nemo/collections/audio/modules/transforms.py
+++ b/nemo/collections/audio/modules/transforms.py
@@ -14,6 +14,7 @@
from typing import Dict, Optional, Tuple
import torch
+from einops import rearrange
from nemo.collections.asr.parts.preprocessing.features import make_seq_mask_like
from nemo.core.classes import NeuralModule, typecheck
@@ -43,6 +44,157 @@ class AudioToSpectrogram(NeuralModule):
scale: Positive scaling of the spectrogram.
"""
+ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.0, scale: float = 1.0):
+ super().__init__()
+
+ # For now, assume FFT length is divisible by two
+ if fft_length % 2 != 0:
+ raise ValueError(f'fft_length = {fft_length} must be divisible by 2')
+
+ self.fft_length = fft_length
+ self.hop_length = hop_length
+ self.pad_mode = 'constant'
+ window = torch.hann_window(self.win_length)
+ self.register_buffer('window', window)
+
+ self.num_subbands = fft_length // 2 + 1
+
+ if magnitude_power <= 0:
+ raise ValueError(f'Magnitude power needs to be positive: current value {magnitude_power}')
+ self.magnitude_power = magnitude_power
+
+ if scale <= 0:
+ raise ValueError(f'Scale needs to be positive: current value {scale}')
+ self.scale = scale
+
+ logging.debug('Initialized %s with:', self.__class__.__name__)
+ logging.debug('\tfft_length: %s', fft_length)
+ logging.debug('\thop_length: %s', hop_length)
+ logging.debug('\tmagnitude_power: %s', magnitude_power)
+ logging.debug('\tscale: %s', scale)
+
+ @property
+ def win_length(self) -> int:
+ return self.fft_length
+
+ def stft(self, x: torch.Tensor):
+ """Apply STFT as in torchaudio.transforms.Spectrogram(power=None)
+
+ Args:
+ x_spec: Input time-domain signal, shape (..., T)
+
+ Returns:
+ Time-domain signal ``x_spec = STFT(x)``, shape (..., F, N).
+ """
+ # pack batch
+ B, C, T = x.size()
+ x = rearrange(x, 'B C T -> (B C) T')
+
+ x_spec = torch.stft(
+ input=x,
+ n_fft=self.fft_length,
+ hop_length=self.hop_length,
+ win_length=self.win_length,
+ window=self.window,
+ center=True,
+ pad_mode=self.pad_mode,
+ normalized=False,
+ onesided=True,
+ return_complex=True,
+ )
+
+ # unpack batch
+ x_spec = rearrange(x_spec, '(B C) F N -> B C F N', B=B, C=C)
+
+ return x_spec
+
+ @property
+ def input_types(self) -> Dict[str, NeuralType]:
+ """Returns definitions of module output ports."""
+ return {
+ "input": NeuralType(('B', 'C', 'T'), AudioSignal()),
+ "input_length": NeuralType(('B',), LengthsType(), optional=True),
+ }
+
+ @property
+ def output_types(self) -> Dict[str, NeuralType]:
+ """Returns definitions of module output ports."""
+ return {
+ "output": NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()),
+ "output_length": NeuralType(('B',), LengthsType()),
+ }
+
+ @typecheck()
+ def forward(
+ self, input: torch.Tensor, input_length: Optional[torch.Tensor] = None
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
+ """Convert a batch of C-channel input signals
+ into a batch of complex-valued spectrograms.
+
+ Args:
+ input: Time-domain input signal with C channels, shape (B, C, T)
+ input_length: Length of valid entries along the time dimension, shape (B,)
+
+ Returns:
+ Output spectrogram with F subbands and N time frames, shape (B, C, F, N)
+ and output length with shape (B,).
+ """
+ B, T = input.size(0), input.size(-1)
+ input = input.view(B, -1, T)
+
+ # STFT output (B, C, F, N)
+ with torch.cuda.amp.autocast(enabled=False):
+ output = self.stft(input.float())
+
+ if self.magnitude_power != 1:
+ # apply power on the magnitude
+ output = torch.pow(output.abs(), self.magnitude_power) * torch.exp(1j * output.angle())
+
+ if self.scale != 1:
+ # apply scaling of the coefficients
+ output = self.scale * output
+
+ if input_length is not None:
+ # Mask padded frames
+ output_length = self.get_output_length(input_length=input_length)
+
+ length_mask: torch.Tensor = make_seq_mask_like(
+ lengths=output_length, like=output, time_dim=-1, valid_ones=False
+ )
+ output = output.masked_fill(length_mask, 0.0)
+ else:
+ # Assume all frames are valid for all examples in the batch
+ output_length = output.size(-1) * torch.ones(B, device=output.device).long()
+
+ return output, output_length
+
+ def get_output_length(self, input_length: torch.Tensor) -> torch.Tensor:
+ """Get length of valid frames for the output.
+
+ Args:
+ input_length: number of valid samples, shape (B,)
+
+ Returns:
+ Number of valid frames, shape (B,)
+ """
+ # centered STFT results in (T // hop_length + 1) frames for T samples (cf. torch.stft)
+ output_length = input_length.div(self.hop_length, rounding_mode='floor').add(1).long()
+ return output_length
+
+
+class AudioToSpectrogramTA(NeuralModule):
+ """Transform a batch of input multi-channel signals into a batch of
+ STFT-based spectrograms. Using torchaudio.
+
+ Args:
+ fft_length: length of FFT
+ hop_length: length of hops/shifts of the sliding window
+ power: exponent for magnitude spectrogram. Default `None` will
+ return a complex-valued spectrogram
+ magnitude_power: Transform magnitude of the spectrogram as x^magnitude_power.
+ scale: Positive scaling of the spectrogram.
+ """
+
def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.0, scale: float = 1.0):
if not HAVE_TORCHAUDIO:
logging.error('Could not import torchaudio. Some features might not work.')
@@ -62,7 +214,7 @@ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.
)
# number of subbands
- self.F = fft_length // 2 + 1
+ self.num_subbands = fft_length // 2 + 1
if magnitude_power <= 0:
raise ValueError(f'Magnitude power needs to be positive: current value {magnitude_power}')
@@ -78,10 +230,6 @@ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.
logging.debug('\tmagnitude_power: %s', magnitude_power)
logging.debug('\tscale: %s', scale)
- @property
- def num_subbands(self) -> int:
- return self.F
-
@property
def input_types(self) -> Dict[str, NeuralType]:
"""Returns definitions of module output ports."""
@@ -166,6 +314,157 @@ class SpectrogramToAudio(NeuralModule):
scale: Spectrogram will be scaled with 1/scale before the inverse transform.
"""
+ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.0, scale: float = 1.0):
+ super().__init__()
+
+ # For now, assume FFT length is divisible by two
+ if fft_length % 2 != 0:
+ raise ValueError(f'fft_length = {fft_length} must be divisible by 2')
+
+ self.fft_length = fft_length
+ self.hop_length = hop_length
+ window = torch.hann_window(self.win_length)
+ self.register_buffer('window', window)
+
+ self.num_subbands = fft_length // 2 + 1
+
+ if magnitude_power <= 0:
+ raise ValueError(f'Magnitude power needs to be positive: current value {magnitude_power}')
+ self.magnitude_power = magnitude_power
+
+ if scale <= 0:
+ raise ValueError(f'Scale needs to be positive: current value {scale}')
+ self.scale = scale
+
+ logging.debug('Initialized %s with:', self.__class__.__name__)
+ logging.debug('\tfft_length: %s', fft_length)
+ logging.debug('\thop_length: %s', hop_length)
+ logging.debug('\tmagnitude_power: %s', magnitude_power)
+ logging.debug('\tscale: %s', scale)
+
+ @property
+ def win_length(self) -> int:
+ return self.fft_length
+
+ def istft(self, x_spec: torch.Tensor):
+ """Apply iSTFT as in torchaudio.transforms.InverseSpectrogram
+
+ Args:
+ x_spec: Input complex-valued spectrogram, shape (..., F, N)
+
+ Returns:
+ Time-domain signal ``x = iSTFT(x_spec)``, shape (..., T).
+ """
+ if not x_spec.is_complex():
+ raise ValueError("Expected `x_spec` to be complex dtype.")
+
+ # pack batch
+ B, C, F, N = x_spec.size()
+ x_spec = rearrange(x_spec, 'B C F N -> (B C) F N')
+
+ x = torch.istft(
+ input=x_spec,
+ n_fft=self.fft_length,
+ hop_length=self.hop_length,
+ win_length=self.win_length,
+ window=self.window,
+ center=True,
+ normalized=False,
+ onesided=True,
+ length=None,
+ return_complex=False,
+ )
+
+ # unpack batch
+ x = rearrange(x, '(B C) T -> B C T', B=B, C=C)
+
+ return x
+
+ @property
+ def input_types(self) -> Dict[str, NeuralType]:
+ """Returns definitions of module output ports."""
+ return {
+ "input": NeuralType(('B', 'C', 'D', 'T'), SpectrogramType()),
+ "input_length": NeuralType(('B',), LengthsType(), optional=True),
+ }
+
+ @property
+ def output_types(self) -> Dict[str, NeuralType]:
+ """Returns definitions of module output ports."""
+ return {
+ "output": NeuralType(('B', 'C', 'T'), AudioSignal()),
+ "output_length": NeuralType(('B',), LengthsType()),
+ }
+
+ @typecheck()
+ def forward(self, input: torch.Tensor, input_length: Optional[torch.Tensor] = None) -> torch.Tensor:
+ """Convert input complex-valued spectrogram to a time-domain
+ signal. Multi-channel IO is supported.
+
+ Args:
+ input: Input spectrogram for C channels, shape (B, C, F, N)
+ input_length: Length of valid entries along the time dimension, shape (B,)
+
+ Returns:
+ Time-domain signal with T time-domain samples and C channels, (B, C, T)
+ and output length with shape (B,).
+ """
+ B, F, N = input.size(0), input.size(-2), input.size(-1)
+ assert F == self.num_subbands, f'Number of subbands F={F} not matching self.num_subbands={self.num_subbands}'
+ input = input.view(B, -1, F, N)
+
+ # iSTFT output (B, C, T)
+ with torch.cuda.amp.autocast(enabled=False):
+ output = input.cfloat()
+
+ if self.scale != 1:
+ # apply 1/scale on the coefficients
+ output = output / self.scale
+
+ if self.magnitude_power != 1:
+ # apply 1/power on the magnitude
+ output = torch.pow(output.abs(), 1 / self.magnitude_power) * torch.exp(1j * output.angle())
+ output = self.istft(output)
+
+ if input_length is not None:
+ # Mask padded samples
+ output_length = self.get_output_length(input_length=input_length)
+
+ length_mask: torch.Tensor = make_seq_mask_like(
+ lengths=output_length, like=output, time_dim=-1, valid_ones=False
+ )
+ output = output.masked_fill(length_mask, 0.0)
+ else:
+ # Assume all frames are valid for all examples in the batch
+ output_length = output.size(-1) * torch.ones(B, device=output.device).long()
+
+ return output, output_length
+
+ def get_output_length(self, input_length: torch.Tensor) -> torch.Tensor:
+ """Get length of valid samples for the output.
+
+ Args:
+ input_length: number of valid frames, shape (B,)
+
+ Returns:
+ Number of valid samples, shape (B,)
+ """
+ # centered STFT results in ((N-1) * hop_length) time samples for N frames (cf. torch.istft)
+ output_length = input_length.sub(1).mul(self.hop_length).long()
+ return output_length
+
+
+class SpectrogramToAudioTA(NeuralModule):
+ """Transform a batch of input multi-channel spectrograms into a batch of
+ time-domain multi-channel signals. Using torchaudio.
+
+ Args:
+ fft_length: length of FFT
+ hop_length: length of hops/shifts of the sliding window
+ magnitude_power: Transform magnitude of the spectrogram as x^(1/magnitude_power).
+ scale: Spectrogram will be scaled with 1/scale before the inverse transform.
+ """
+
def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.0, scale: float = 1.0):
if not HAVE_TORCHAUDIO:
logging.error('Could not import torchaudio. Some features might not work.')
@@ -184,7 +483,7 @@ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.
n_fft=fft_length, hop_length=hop_length, pad_mode='constant'
)
- self.F = fft_length // 2 + 1
+ self.num_subbands = fft_length // 2 + 1
if magnitude_power <= 0:
raise ValueError(f'Magnitude power needs to be positive: current value {magnitude_power}')
@@ -200,10 +499,6 @@ def __init__(self, fft_length: int, hop_length: int, magnitude_power: float = 1.
logging.debug('\tmagnitude_power: %s', magnitude_power)
logging.debug('\tscale: %s', scale)
- @property
- def num_subbands(self) -> int:
- return self.F
-
@property
def input_types(self) -> Dict[str, NeuralType]:
"""Returns definitions of module output ports."""
@@ -234,7 +529,7 @@ def forward(self, input: torch.Tensor, input_length: Optional[torch.Tensor] = No
and output length with shape (B,).
"""
B, F, N = input.size(0), input.size(-2), input.size(-1)
- assert F == self.F, f'Number of subbands F={F} not matching self.F={self.F}'
+ assert F == self.num_subbands, f'Number of subbands F={F} not matching self.num_subbands={self.num_subbands}'
input = input.view(B, -1, F, N)
# iSTFT output (B, C, T)
diff --git a/nemo/collections/common/prompts/canary.py b/nemo/collections/common/prompts/canary.py
index e511368a1edf..f2b1e58c3bb2 100644
--- a/nemo/collections/common/prompts/canary.py
+++ b/nemo/collections/common/prompts/canary.py
@@ -16,9 +16,13 @@ class CanaryPromptFormatter(PromptFormatter):
"template": f"{CANARY_BOS}|source_lang||task||target_lang||pnc|",
"slots": {
"source_lang": Modality.Text,
- "task": Modality.TextLiteral("asr", "ast", "s2t_translation", "<|transcribe|>", "<|translate|>"),
+ "task": Modality.TextLiteral(
+ "asr", "ast", "translate", "transcribe", "s2t_translation", "<|transcribe|>", "<|translate|>"
+ ),
"target_lang": Modality.Text,
- "pnc": Modality.TextLiteral("yes", "no", "<|pnc|>", "<|nopnc|>"),
+ "pnc": Modality.TextLiteral(
+ "yes", "no", "true", "True", "false", "False", "1", "0", "pnc", "nopnc", "<|pnc|>", "<|nopnc|>"
+ ),
},
},
OUTPUT_ROLE: {
@@ -54,13 +58,18 @@ def map_manifest_values_to_special_tokens(slot_values: dict[str, str]) -> dict[s
k = "pnc"
if k in slot_values and slot_values[k] not in (CANARY_PNC, CANARY_NOPNC):
- slot_values[k] = CANARY_PNC if slot_values[k] in ("yes", "1", "True", "true") else CANARY_NOPNC
+ slot_values[k] = CANARY_PNC if slot_values[k] in ("yes", "1", "True", "true", "pnc") else CANARY_NOPNC
any_special_token_present = True
# Note: we re-map 'taskname' to 'task' for compatibility with earlier versions of Canary training.
for k in ("task", "taskname"):
if k in slot_values and slot_values[k] not in ("<|transcribe|>", "<|translate|>"):
- slot_values["task"] = "<|transcribe|>" if slot_values[k] == "asr" else "<|translate|>"
+ if slot_values[k] in {"translate", "ast", "s2t_translation"}:
+ slot_values["task"] = "<|translate|>"
+ elif slot_values[k] in {"transcribe", "asr"}:
+ slot_values["task"] = "<|transcribe|>"
+ else:
+ assert False, f"Task {slot_values[k]} invalid task for slot {k}"
any_special_token_present = True
# Auto-inject which tokenizer to look up in CanaryTokenizer if not provided,
diff --git a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py
index f4081735eb71..907c308e1ddc 100644
--- a/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py
+++ b/nemo/collections/common/tokenizers/text_to_speech/ipa_lexicon.py
@@ -15,7 +15,7 @@
# fmt: off
-SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES", "it-IT", "fr-FR"]
+SUPPORTED_LOCALES = ["en-US", "de-DE", "es-ES", "it-IT", "fr-FR", "vi-VN", "ja-JP"]
DEFAULT_PUNCTUATION = (
',', '.', '!', '?', '-',
@@ -48,6 +48,19 @@
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
'U', 'V', 'W', 'X', 'Y', 'Z', 'Ä', 'Ö', 'Ü', 'ẞ',
),
+ # ref: https://en.wikipedia.org/wiki/Vietnamese_alphabet
+ "vi-VN": (
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
+ 'U', 'V', 'W', 'X', 'Y', 'Z', 'Đ', 'Á', 'À', 'Ã',
+ 'Ả', 'Ạ', 'Ă', 'Ắ', 'Ằ', 'Ẵ', 'Ẳ', 'Ặ', 'Â', 'Ấ',
+ 'Ầ', 'Ẫ', 'Ẩ', 'Ậ', 'Ó', 'Ò', 'Õ', 'Ỏ', 'Ọ', 'Ô',
+ 'Ố', 'Ồ', 'Ỗ', 'Ổ', 'Ộ', 'Ơ', 'Ớ', 'Ờ', 'Ỡ', 'Ở',
+ 'Ợ', 'É', 'È', 'Ẽ', 'Ẻ', 'Ẹ', 'Ê', 'Ế', 'Ề', 'Ễ',
+ 'Ể', 'Ệ', 'Ú', 'Ù', 'Ũ', 'Ủ', 'Ụ', 'Ư', 'Ứ', 'Ừ',
+ 'Ữ', 'Ử', 'Ự', 'Í', 'Ì', 'Ĩ', 'Ỉ', 'Ị', 'Ý', 'Ỳ',
+ 'Ỹ', 'Ỷ', 'Ỵ',
+ ),
"fr-FR": (
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
@@ -104,6 +117,29 @@
'ɽ','ʂ','ʈ','ʧ','ʉ','ʋ','ⱱ','ɤ','ʍ','χ','ʏ','ʑ','ʐ',
'ʔ','ʡ','ʕ','ʢ','ǀ','ǁ','ǂ','ᵻ', 'ʃ','ː',
),
+ "vi-VN": (
+ 'a', 'ə', 'ɛ', 'e', 'i', 'o', 'ɔ', 'u', 'ɨ',
+ 'b', 'c', 'z', 'j', 'd', 'g', 'h', 'x', 'l',
+ 'm', 'n', 'ŋ', 'ɲ', 'p', 'f', 'w', 'r', 's',
+ 'ʃ', 't', 'ʈ', 'ʂ', 'v', 'ʔ', 'ɓ', 'ɗ', 'ɣ',
+ 'k', 'ʰ', 'ʷ', 'ɕ', 'ʑ', 'ʝ', '̚', '̟', 't͡',
+ '˧', 'ː', 'ɯ', '̀', '̄', '̌', '̂', 'ˀ', '͡', '˥',
+ '˩', '̤', '˨', 'ɹ', 'ʲ', '̯', 'ă', 'ə̆', 'ǐ',
+ '˦', 'æ', 'ɐ',
+ 'ɜ', 'ɡ', 'ɪ', 'ɬ' 'ɾ', 'ʊ', 'ʌ', 'ʒ', '̃',
+ '̩', 'θ', 'ᵻ',
+ ),
+ "ja-JP": (
+ 'a', 'i', 'u', 'e', 'o', 'ɯ', 'I', 'ɑ' , 'ɨ ', 'ɒ',
+ 'ɔ', 'iᵑ', 'eᵑ', 'a', 'ʊ', 'ə', 'eᵝ', 'ɐ', 'ɛ',
+ 'w', 'k', 'ɾ', 's', 't', 'ʃ', 'r', 'h', 'n', 'nʲ',
+ 'ɲ', 'ç', 'b', 'm', 'j', 'ɸ', 'z', 'p', 'd', 'N',
+ 'ʒ', 'ŋ', 'g', 'f', 'ʔ', 'y', 'ɟ', 'v', 'ɥ', 'ɰ',
+ 'ɰᵝ', 'ɣ', 'ʄ', 'ʑ', 'c', 'ɕ', 'ɠ', 'x', 'l', 'β',
+ 'ð', 'ø', 'ʁ', 'ts', 'tʃ', 'dʒ', 'y', 'dʑ', 't͡s',
+ 'ɑ̃', 'ĩ', 'ũ', 'ẽ', 'õ', 'ɑ̃', 'ĩ', 'ũ', 'w̃',
+ 'ẽ', 'õ', 'hʲ', 'ɪ', 'ː', 'o̞', 'e̞',
+ ),
}
GRAPHEME_CHARACTER_CASES = ["upper", "lower", "mixed"]
@@ -157,7 +193,7 @@ def get_ipa_punctuation_list(locale):
punct_set = set(DEFAULT_PUNCTUATION)
# TODO @xueyang: verify potential mismatches with locale-specific punctuation sets used
# in nemo_text_processing.text_normalization.en.taggers.punctuation.py
- if locale in ["de-DE", "es-ES", "it-IT", "fr-FR"]:
+ if locale in ["de-DE", "es-ES", "it-IT", "fr-FR", "ja-JP"]:
# ref: https://en.wikipedia.org/wiki/Guillemet#Uses
punct_set.update(['«', '»', '‹', '›'])
if locale == "de-DE":
@@ -218,6 +254,48 @@ def get_ipa_punctuation_list(locale):
'̧', # combining cedilla, U+0327, decimal 807
]
)
-
+ elif locale == "ja-JP":
+ # ref: https://en.wikipedia.org/wiki/List_of_Japanese_typographic_symbols
+ punct_set.update(
+ [
+ '【',
+ '】',
+ '…',
+ '‥',
+ '「',
+ '」',
+ '『',
+ '』',
+ '〜',
+ '。',
+ '、',
+ 'ー',
+ '・・・',
+ '〃',
+ '〔',
+ '〕',
+ '⦅',
+ '⦆',
+ '〈',
+ '〉',
+ '《',
+ '》',
+ '〖',
+ '〗',
+ '〘',
+ '〙',
+ '〚',
+ '〛',
+ '•',
+ '◦',
+ '﹅',
+ '﹆',
+ '※',
+ '*',
+ '〽',
+ '〓',
+ '〒',
+ ]
+ )
punct_list = sorted(list(punct_set))
return punct_list
diff --git a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
index 542b18186846..b92210b20288 100644
--- a/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
+++ b/nemo/collections/common/tokenizers/text_to_speech/tokenizer_utils.py
@@ -24,11 +24,13 @@
"english_text_preprocessing",
"any_locale_text_preprocessing",
"spanish_text_preprocessing",
+ "vietnamese_text_preprocessing",
"italian_text_preprocessing",
"any_locale_word_tokenize",
"english_word_tokenize",
"LATIN_CHARS_ALL",
"normalize_unicode_text",
+ "japanese_text_preprocessing",
]
# Derived from LJSpeech
@@ -201,3 +203,11 @@ def chinese_text_preprocessing(text: str) -> str:
def french_text_preprocessing(text: str) -> str:
return text.lower()
+
+
+def vietnamese_text_preprocessing(text: str) -> str:
+ return text.lower()
+
+
+def japanese_text_preprocessing(text: str) -> str:
+ return text.lower()
diff --git a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
index 1aefc6f1b4bb..943ad78a342a 100644
--- a/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
+++ b/nemo/collections/common/tokenizers/text_to_speech/tts_tokenizers.py
@@ -30,7 +30,9 @@
english_text_preprocessing,
french_text_preprocessing,
italian_text_preprocessing,
+ japanese_text_preprocessing,
spanish_text_preprocessing,
+ vietnamese_text_preprocessing,
)
from nemo.utils import logging
from nemo.utils.decorators import experimental
@@ -202,6 +204,43 @@ def __init__(
)
+class VietnameseCharsTokenizer(BaseCharsTokenizer):
+
+ _LOCALE = "vi-VN"
+ _CHARSET_STR = get_grapheme_character_set(locale=_LOCALE, case="mixed")
+
+ def __init__(
+ self,
+ chars=_CHARSET_STR,
+ punct=True,
+ apostrophe=True,
+ add_blank_at=None,
+ pad_with_space=False,
+ non_default_punct_list=None,
+ text_preprocessing_func=vietnamese_text_preprocessing,
+ ):
+ """Vietnamese grapheme tokenizer.
+ Args:
+ punct: Whether to reserve grapheme for basic punctuation or not.
+ apostrophe: Whether to use apostrophe or not.
+ add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None),
+ if None then no blank in labels.
+ pad_with_space: Whether to pad text with spaces at the beginning and at the end or not.
+ non_default_punct_list: List of punctuation marks which will be used instead default.
+ text_preprocessing_func: Text preprocessing function for correct execution of the tokenizer. By default, it
+ would keep any word lowercase.
+ """
+ super().__init__(
+ chars=chars,
+ punct=punct,
+ apostrophe=apostrophe,
+ add_blank_at=add_blank_at,
+ pad_with_space=pad_with_space,
+ non_default_punct_list=non_default_punct_list,
+ text_preprocessing_func=vietnamese_text_preprocessing,
+ )
+
+
class GermanCharsTokenizer(BaseCharsTokenizer):
_LOCALE = "de-DE"
@@ -245,7 +284,12 @@ class SpanishCharsTokenizer(BaseCharsTokenizer):
PUNCT_LIST = get_ipa_punctuation_list("es-ES")
def __init__(
- self, punct=True, apostrophe=True, add_blank_at=None, pad_with_space=False, non_default_punct_list=None,
+ self,
+ punct=True,
+ apostrophe=True,
+ add_blank_at=None,
+ pad_with_space=False,
+ non_default_punct_list=None,
):
"""Spanish grapheme tokenizer.
Args:
@@ -274,7 +318,12 @@ class FrenchCharsTokenizer(BaseCharsTokenizer):
PUNCT_LIST = get_ipa_punctuation_list("fr-FR")
def __init__(
- self, punct=True, apostrophe=True, add_blank_at=None, pad_with_space=False, non_default_punct_list=None,
+ self,
+ punct=True,
+ apostrophe=True,
+ add_blank_at=None,
+ pad_with_space=False,
+ non_default_punct_list=None,
):
"""French grapheme tokenizer.
Args:
@@ -916,3 +965,112 @@ def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None):
ps = [space] + ps + [space]
return [self._token2id[p] for p in ps]
+
+
+class JapanesePhonemeTokenizer(BaseTokenizer):
+
+ JA_PUNCT_LIST = get_ipa_punctuation_list("ja-JP")
+
+ def __init__(
+ self,
+ g2p,
+ punct=True,
+ non_default_punct_list=None,
+ *,
+ space=' ',
+ silence=None,
+ apostrophe=True,
+ sep='|', # To be able to distinguish between 2/3 letters codes.
+ add_blank_at=None,
+ pad_with_space=False,
+ text_preprocessing_func=japanese_text_preprocessing,
+ ):
+ """Japanese phoneme-based tokenizer.
+ Note: This tokenizer for now covers Japanese phonemes
+ Args:
+ g2p: Grapheme to phoneme module.
+ punct: Whether to reserve grapheme for basic punctuation or not.
+ non_default_punct_list: List of punctuation marks which will be used instead default.
+ space: Space token as string.
+ silence: Silence token as string (will be disabled if it is None).
+ apostrophe: Whether to use apostrophe or not.
+ sep: Separation token as string.
+ add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None),
+ if None then no blank in labels.
+ pad_with_space: Whether to pad text with spaces at the beginning and at the end or not.
+ text_preprocessing_func: Text preprocessing function for correct execution of the tokenizer.
+ Basically, it replaces all non-unicode characters with unicode ones.
+ Note that lower() function shouldn't be applied here, in case the text contains phonemes (it will be handled by g2p).
+ """
+ tokens = []
+ self.space, tokens = len(tokens), tokens + [space] # Space
+
+ if silence is not None:
+ self.silence, tokens = len(tokens), tokens + [silence] # Silence
+
+ self.phoneme_list = g2p.phoneme_list
+ self.ascii_letter_list = g2p.ascii_letter_list
+
+ tokens.extend(self.phoneme_list)
+ tokens.extend(self.ascii_letter_list)
+
+ self.text_preprocessing_func = text_preprocessing_func
+
+ if apostrophe:
+ tokens.append("'") # Apostrophe
+
+ if punct:
+ if non_default_punct_list is not None:
+ self.PUNCT_LIST = non_default_punct_list
+ else:
+ self.PUNCT_LIST = list(self.JA_PUNCT_LIST)
+ tokens.extend(self.PUNCT_LIST)
+
+ super().__init__(tokens, sep=sep, add_blank_at=add_blank_at)
+
+ self.punct = punct
+ self.pad_with_space = pad_with_space
+ self.g2p = g2p
+
+ def encode(self, text: str) -> List[int]:
+ """See base class for more information."""
+ text = self.text_preprocessing_func(text)
+ g2p_text = self.g2p(text)
+ return self.encode_from_g2p(g2p_text, text)
+
+ def encode_from_g2p(self, g2p_text: List[str], raw_text: Optional[str] = None):
+ """
+ Encodes text that has already been run through G2P.
+ Called for encoding to tokens after text preprocessing and G2P.
+
+ Args:
+ g2p_text: G2P's output, could be a mixture of Chinese phonemes and English letters.
+ raw_text: original raw input
+ """
+ ps, space, tokens = [], self.tokens[self.space], set(self.tokens)
+ for p in g2p_text: # noqa
+ # Add space if last one isn't one
+ if p == space and len(ps) > 0 and ps[-1] != space:
+ ps.append(p)
+ # Add next phoneme or tone or ascii letter or apostrophe.
+ elif (p.isalnum() or p == "'" or p in self.phoneme_list + self.ascii_letter_list) and p in tokens:
+ ps.append(p)
+ # Add punctuation
+ elif (p in self.PUNCT_LIST) and self.punct:
+ ps.append(p)
+ # Warn about unknown char/phoneme
+ elif p != space:
+ message = f"Text: [{' '.join(g2p_text)}] contains unknown char/phoneme: [{p}]."
+ if raw_text is not None:
+ message += f"Original text: [{raw_text}]. Symbol will be skipped."
+ logging.warning(message)
+
+ # Remove trailing spaces
+ if ps:
+ while ps[-1] == space:
+ ps.pop()
+
+ if self.pad_with_space:
+ ps = [space] + ps + [space]
+
+ return [self._token2id[p] for p in ps]
diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py
index 83c0a3af48c0..b5283ee8a1c9 100644
--- a/nemo/collections/llm/__init__.py
+++ b/nemo/collections/llm/__init__.py
@@ -42,24 +42,7 @@
gpt_data_step,
gpt_forward_step,
)
-from nemo.collections.llm.gpt.model.api import (
- code_gemma_2b,
- code_gemma_7b,
- code_llama_7b,
- code_llama_13b,
- code_llama_34b,
- code_llama_70b,
- gemma,
- gemma_2b,
- gemma_7b,
- llama2_7b,
- llama2_13b,
- llama2_70b,
- llama3_8b,
- llama3_70b,
- mistral,
- mixtral,
-)
+from nemo.collections.llm.recipes import * # noqa
__all__ = [
"MockDataModule",
@@ -103,21 +86,5 @@
"mock",
"squad",
"dolly",
- "mistral",
- "mixtral",
- "llama2_7b",
- "llama3_8b",
- "llama2_13b",
- "llama2_70b",
- "llama3_70b",
- "code_llama_7b",
- "code_llama_13b",
- "code_llama_34b",
- "code_llama_70b",
- "gemma",
- "gemma_2b",
- "gemma_7b",
- "code_gemma_2b",
- "code_gemma_7b",
"peft",
]
diff --git a/nemo/collections/llm/gpt/data/core.py b/nemo/collections/llm/gpt/data/core.py
index 8d99583016a4..6f8fe237e10a 100644
--- a/nemo/collections/llm/gpt/data/core.py
+++ b/nemo/collections/llm/gpt/data/core.py
@@ -32,6 +32,7 @@ def create_sft_dataset(
truncation_method: str = 'right',
memmap_workers: int = 2,
hf_dataset: bool = False,
+ global_sample_mapping: bool = False,
**kwargs,
) -> "GPTSFTDataset":
from nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset import GPTSFTDataset
@@ -42,6 +43,7 @@ def create_sft_dataset(
max_seq_length=seq_length,
memmap_workers=memmap_workers,
hf_dataset=hf_dataset,
+ global_sample_mapping=global_sample_mapping,
add_bos=add_bos,
add_eos=add_eos,
add_sep=add_sep,
diff --git a/nemo/collections/llm/gpt/data/dolly.py b/nemo/collections/llm/gpt/data/dolly.py
index 9632a142eb35..7ed17e460e0f 100644
--- a/nemo/collections/llm/gpt/data/dolly.py
+++ b/nemo/collections/llm/gpt/data/dolly.py
@@ -7,13 +7,14 @@
from nemo.collections.llm.gpt.data.core import get_dataset_root
from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule
+from nemo.lightning.io.mixin import IOMixin
from nemo.utils import logging
if TYPE_CHECKING:
from nemo.collections.common.tokenizers import TokenizerSpec
-class DollyDataModule(FineTuningDataModule):
+class DollyDataModule(FineTuningDataModule, IOMixin):
"""A data module for fine-tuning on the Dolly dataset.
This class inherits from the `FineTuningDataModule` class and is specifically designed for fine-tuning models on the
diff --git a/nemo/collections/llm/gpt/data/pre_training.py b/nemo/collections/llm/gpt/data/pre_training.py
index b405a46f729f..28dcbb0371d2 100644
--- a/nemo/collections/llm/gpt/data/pre_training.py
+++ b/nemo/collections/llm/gpt/data/pre_training.py
@@ -1,11 +1,13 @@
+import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional
import pytorch_lightning as pl
from pytorch_lightning.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS
from torch.utils import data
-from torch.utils.data import DataLoader
+from nemo.lightning.data import WrappedDataLoader
+from nemo.lightning.io.mixin import IOMixin
from nemo.lightning.pytorch.plugins import MegatronDataSampler
if TYPE_CHECKING:
@@ -14,11 +16,10 @@
from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec
-class PreTrainingDataModule(pl.LightningDataModule):
+class PreTrainingDataModule(pl.LightningDataModule, IOMixin):
def __init__(
self,
- paths: Path | List[Path],
- weights: Optional[List[float]] = None,
+ paths: Path | List | Dict[str, List],
seq_length: int = 2048,
tokenizer: Optional["TokenizerSpec"] = None,
micro_batch_size: int = 4,
@@ -38,16 +39,30 @@ def __init__(
index_mapping_dir: Optional[str] = None,
) -> None:
super().__init__()
- if not isinstance(paths, (list, tuple)):
+ if not isinstance(paths, (list, tuple, dict)):
paths = [paths]
- if weights is not None:
- assert len(weights) == len(paths)
- if len(weights) == 1:
- # weights must be None if there is only one dataset
+
+ from megatron.core.datasets.utils import get_blend_from_list
+
+ build_kwargs = {}
+ if isinstance(paths, dict):
+ if split is not None:
+ warnings.warn(
+ f"{split=} will be ignored since datasets are being created " f"from 3 separate distributions."
+ )
+ build_kwargs["blend_per_split"] = [
+ get_blend_from_list(paths["train"]),
+ get_blend_from_list(paths["validation"]),
+ get_blend_from_list(paths["test"]),
+ ]
+ else:
+ paths, weights = get_blend_from_list(paths)
+ if len(paths) == 1:
weights = None
+ build_kwargs["blend"] = [paths, weights]
+ build_kwargs["split"] = split
- self.paths = paths
- self.weights = weights
+ self.build_kwargs = build_kwargs
self.seq_length = seq_length
self.tokenizer = tokenizer
self.num_train_samples = num_train_samples
@@ -92,8 +107,19 @@ def setup(self, stage: str = "") -> None:
num_test_samples = int(test_iters * self.data_sampler.global_batch_size)
if self.trainer.limit_val_batches <= 1.0 and isinstance(self.trainer.limit_val_batches, float):
+ assert "blend" not in self.build_kwargs, (
+ "When using a single data distribution, limit_val_batches <= 1.0 is not supported. If you'd "
+ "like to run with a fractional value of limit_val_batches, please pass in separate datasets for "
+ "the train, validation, and test datasets by providing a dictionary of paths, e.g.: \n"
+ " paths={ \n "
+ " 'train': [PATHS FOR TRAIN], \n "
+ " 'validation': [PATHS FOR VALIDATION], \n "
+ " 'test' :[PATHS FOR TEST], \n"
+ " }"
+ )
+
# This is to make sure we only have one epoch on every validation iteration
- num_val_samples = None if self.weights is None else 1
+ num_val_samples = None
train_valid_test_num_samples = [num_train_samples, num_val_samples, num_test_samples]
self._train_ds, self._validation_ds, self._test_ds = BlendedMegatronDatasetBuilder(
@@ -121,39 +147,40 @@ def setup(self, stage: str = "") -> None:
# ).build()
def train_dataloader(self) -> TRAIN_DATALOADERS:
- return self._create_dataloader(self._train_ds)
+ return self._create_dataloader(self._train_ds, mode='train')
def val_dataloader(self) -> EVAL_DATALOADERS:
- return self._create_dataloader(self._validation_ds)
+ return self._create_dataloader(self._validation_ds, mode='validation')
def test_dataloader(self) -> EVAL_DATALOADERS:
- return self._create_dataloader(self._test_ds)
+ return self._create_dataloader(self._test_ds, mode='test')
- def _create_dataloader(self, dataset, **kwargs) -> DataLoader:
+ def _create_dataloader(self, dataset, mode, **kwargs) -> WrappedDataLoader:
self.init_global_step = self.trainer.global_step
- return DataLoader(
- dataset,
+ dataloader = WrappedDataLoader(
+ mode=mode,
+ dataset=dataset,
num_workers=self.num_workers,
pin_memory=self.pin_memory,
persistent_workers=self.persistent_workers,
collate_fn=getattr(dataset, 'collate_fn', data.dataloader.default_collate),
**kwargs,
)
+ return dataloader
@property
def gpt_dataset_config(self) -> "GPTDatasetConfig":
from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
return GPTDatasetConfig(
- blend=[[str(path) for path in self.paths], self.weights],
random_seed=self.seed,
sequence_length=self.seq_length,
tokenizer=self.tokenizer,
- split=self.split,
path_to_cache=self.index_mapping_dir,
reset_position_ids=self.reset_position_ids,
reset_attention_mask=self.reset_attention_mask,
eod_mask_loss=self.eod_mask_loss,
+ **self.build_kwargs,
)
def state_dict(self) -> Dict[str, Any]:
@@ -185,11 +212,53 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
consistency_check=False,
)
current_global_batch_size = num_microbatch_calculator.current_global_batch_size
- '''pl_module.log(
- "global_batch_size",
- current_global_batch_size,
- prog_bar=True,
- rank_zero_only=True,
- batch_size=1,
- )'''
- self.if_first_step = 1
+ self.data_sampler.if_first_step = 1
+
+ def reconfigure_limit_batches(self):
+ # Override limit_train_batches in terms of num of microbatches
+ self._reconfigure_limit_batches(self.trainer.limit_train_batches, self._train_ds, 'train')
+ # Override limit_val_batches to be a multiple of num microbatches to prevent val_step from exiting in between a step
+ self._reconfigure_limit_batches(self.trainer.limit_val_batches, self._validation_ds, 'val')
+
+ def _reconfigure_limit_batches(self, limit_batches, dataloader, mode):
+ """
+ Reconfigure trainer.limit_val_batches for pretraining
+ """
+ # Override limit_batches in terms of num microbatches and so there are limit_batches//num_micro_batches num of global batches
+ from megatron.core.num_microbatches_calculator import get_num_microbatches
+
+ if isinstance(limit_batches, int):
+ limit_batches *= get_num_microbatches()
+ else:
+ assert isinstance(limit_batches, float)
+ # Don't reconfigure if limit_batches is 0.0 or if there's no dataloader
+ if limit_batches == 0.0 or dataloader is None:
+ return
+ # len(dataloader) returns len as num of microbatches
+ dl_len_in_micro_batches = len(dataloader)
+ if len(dataloader) != float("inf"):
+ if limit_batches == 1.0:
+ limit_batches = dl_len_in_micro_batches
+ else:
+ limit_micro_batches = int(dl_len_in_micro_batches * limit_batches)
+ if limit_micro_batches == 0 and limit_batches > 0.0:
+ min_percentage = 1.0 / len(dataloader)
+ raise MisconfigurationException(
+ f"You requested to check {limit_batches} of the val_dataloader but"
+ f" {limit_batches} * {len(dataloader)} < 1. Please increase the"
+ f" `limit_val_batches` argument. Try at least"
+ f" `limit_val_batches={min_percentage}`"
+ )
+ # Make sure trainer.limit_val_batches is a multiple of num of microbatches
+ if limit_micro_batches < get_num_microbatches():
+ limit_batches = get_num_microbatches()
+ else:
+ limit_batches = limit_batches - limit_batches % get_num_microbatches()
+
+ if mode == 'train':
+ self.trainer.limit_train_batches = limit_batches
+ else:
+ self.trainer.limit_val_batches = limit_batches
+
+ # Override num sanity steps to be a multiple of num of microbatches
+ self.trainer.num_sanity_val_steps *= get_num_microbatches()
diff --git a/nemo/collections/llm/gpt/data/squad.py b/nemo/collections/llm/gpt/data/squad.py
index 77d48da98a0e..11104fe3cab2 100644
--- a/nemo/collections/llm/gpt/data/squad.py
+++ b/nemo/collections/llm/gpt/data/squad.py
@@ -6,13 +6,14 @@
from nemo.collections.llm.gpt.data.core import get_dataset_root
from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule
+from nemo.lightning.io.mixin import IOMixin
from nemo.utils import logging
if TYPE_CHECKING:
from nemo.collections.common.tokenizers import TokenizerSpec
-class SquadDataModule(FineTuningDataModule):
+class SquadDataModule(FineTuningDataModule, IOMixin):
"""A data module for fine-tuning on the Squad dataset.
This class inherits from the `FineTuningDataModule` class and is specifically designed for fine-tuning models on the
@@ -124,3 +125,6 @@ def _preprocess_and_split_data(
shutil.rmtree(p)
elif '.jsonl' not in str(p.name):
p.unlink()
+
+ def reconfigure_limit_batches(self):
+ return
diff --git a/nemo/collections/llm/gpt/model/api.py b/nemo/collections/llm/gpt/model/api.py
deleted file mode 100644
index 7c8cbf4d02e6..000000000000
--- a/nemo/collections/llm/gpt/model/api.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import pytorch_lightning as pl
-
-from nemo.collections.llm.gpt.model.gemma import (
- CodeGemmaConfig2B,
- CodeGemmaConfig7B,
- GemmaConfig,
- GemmaConfig2B,
- GemmaConfig7B,
- GemmaModel,
-)
-from nemo.collections.llm.gpt.model.llama import (
- CodeLlamaConfig7B,
- CodeLlamaConfig13B,
- CodeLlamaConfig34B,
- CodeLlamaConfig70B,
- Llama2Config7B,
- Llama2Config13B,
- Llama2Config70B,
- Llama3Config8B,
- Llama3Config70B,
- LlamaModel,
-)
-from nemo.collections.llm.gpt.model.mistral import MistralConfig7B, MistralModel
-from nemo.collections.llm.gpt.model.mixtral import MixtralConfig8x7B, MixtralModel
-from nemo.collections.llm.utils import factory
-
-
-@factory
-def mistral() -> pl.LightningModule:
- return MistralModel(MistralConfig7B())
-
-
-@factory
-def mixtral() -> pl.LightningModule:
- return MixtralModel(MixtralConfig8x7B())
-
-
-@factory
-def llama2_7b() -> pl.LightningModule:
- return LlamaModel(Llama2Config7B())
-
-
-@factory
-def llama3_8b() -> pl.LightningModule:
- return LlamaModel(Llama3Config8B())
-
-
-@factory
-def llama2_13b() -> pl.LightningModule:
- return LlamaModel(Llama2Config13B())
-
-
-@factory
-def llama2_70b() -> pl.LightningModule:
- return LlamaModel(Llama2Config70B())
-
-
-@factory
-def llama3_70b() -> pl.LightningModule:
- return LlamaModel(Llama3Config70B())
-
-
-@factory
-def code_llama_7b() -> pl.LightningModule:
- return LlamaModel(CodeLlamaConfig7B())
-
-
-@factory
-def code_llama_13b() -> pl.LightningModule:
- return LlamaModel(CodeLlamaConfig13B())
-
-
-@factory
-def code_llama_34b() -> pl.LightningModule:
- return LlamaModel(CodeLlamaConfig34B())
-
-
-@factory
-def code_llama_70b() -> pl.LightningModule:
- return LlamaModel(CodeLlamaConfig70B())
-
-
-@factory
-def gemma() -> pl.LightningModule:
- return GemmaModel(GemmaConfig())
-
-
-@factory
-def gemma_2b() -> pl.LightningModule:
- return GemmaModel(GemmaConfig2B())
-
-
-@factory
-def gemma_7b() -> pl.LightningModule:
- return GemmaModel(GemmaConfig7B())
-
-
-@factory
-def code_gemma_2b() -> pl.LightningModule:
- return GemmaModel(CodeGemmaConfig2B())
-
-
-@factory
-def code_gemma_7b() -> pl.LightningModule:
- return GemmaModel(CodeGemmaConfig7B())
-
-
-__all__ = [
- "mistral",
- "mixtral",
- "llama2_7b",
- "llama3_8b",
- "llama2_13b",
- "llama2_70b",
- "llama3_70b",
- "code_llama_7b",
- "code_llama_13b",
- "code_llama_34b",
- "code_llama_70b",
- "gemma",
- "gemma_2b",
- "gemma_7b",
- "code_gemma_2b",
- "code_gemma_7b",
-]
diff --git a/nemo/collections/llm/gpt/model/mixtral.py b/nemo/collections/llm/gpt/model/mixtral.py
index 6256b67515ee..96edeadd439a 100644
--- a/nemo/collections/llm/gpt/model/mixtral.py
+++ b/nemo/collections/llm/gpt/model/mixtral.py
@@ -47,6 +47,8 @@ class MixtralConfig8x7B(GPTConfig):
# rotary
rotary_percent: float = 0.5
rotary_base: float = 10000
+ bf16: bool = True
+ params_dtype: torch.dtype = torch.bfloat16
class MixtralModel(GPTModel):
@@ -70,7 +72,7 @@ def init(self) -> MixtralModel:
def apply(self, output_path: Path) -> Path:
from transformers import MixtralForCausalLM
- source = MixtralForCausalLM.from_pretrained(str(self))
+ source = MixtralForCausalLM.from_pretrained(str(self), torch_dtype='auto', use_safetensors=True)
target = self.init()
trainer = self.nemo_setup(target)
self.convert_state(source, target)
@@ -109,6 +111,7 @@ def config(self) -> MixtralConfig8x7B:
config = HfMixtralConfig.from_pretrained(str(self))
return MixtralConfig8x7B(
+ bf16=getattr(config, "torch_dtype", None) == torch.bfloat16,
activation_func=F.silu,
# network
num_layers=config.num_hidden_layers,
@@ -132,6 +135,10 @@ def config(self) -> MixtralConfig8x7B:
gated_linear_unit=True,
# Vocab
make_vocab_size_divisible_by=128,
+ # CPU init
+ use_cpu_initialization=True,
+ perform_initialization=False,
+ params_dtype=getattr(config, "torch_dtype", torch.bfloat16),
)
diff --git a/nemo/collections/llm/recipes/__init__.py b/nemo/collections/llm/recipes/__init__.py
new file mode 100644
index 000000000000..8d4d874362a9
--- /dev/null
+++ b/nemo/collections/llm/recipes/__init__.py
@@ -0,0 +1,13 @@
+from nemo.collections.llm.recipes import llama2_7b, llama3_8b, llama3_8b_16k, llama3_8b_64k, mistral
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim import adam
+
+__all__ = [
+ "llama3_8b",
+ "llama3_8b_16k",
+ "llama3_8b_64k",
+ "llama2_7b",
+ "mistral",
+ "adam",
+ "default_log",
+]
diff --git a/nemo/collections/llm/recipes/llama2_7b.py b/nemo/collections/llm/recipes/llama2_7b.py
new file mode 100644
index 000000000000..1767dc4690c8
--- /dev/null
+++ b/nemo/collections/llm/recipes/llama2_7b.py
@@ -0,0 +1,61 @@
+import pytorch_lightning as pl
+
+from nemo import lightning as nl
+from nemo.collections.llm.api import finetune, pretrain
+from nemo.collections.llm.gpt.data.api import squad
+from nemo.collections.llm.gpt.model.llama import Llama2Config7B, LlamaModel
+from nemo.collections.llm.peft.api import gpt_lora
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing
+from nemo.collections.llm.utils import Partial, factory
+
+NAME = "llama2_7b"
+
+
+@factory(name=NAME)
+def model() -> pl.LightningModule:
+ return LlamaModel(Llama2Config7B())
+
+
+@factory(name=NAME)
+def trainer(devices=8) -> nl.Trainer:
+ strategy = nl.MegatronStrategy(tensor_model_parallel_size=2)
+
+ return nl.Trainer(
+ devices=devices,
+ max_steps=100,
+ accelerator="gpu",
+ strategy=strategy,
+ plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"),
+ )
+
+
+@factory(name=NAME + "_hf")
+def hf_resume() -> nl.AutoResume:
+ return nl.AutoResume(import_path="hf://meta-llama/Llama-2-7b-hf")
+
+
+@factory(name=NAME, for_task="llm.pretrain")
+def pretrain_recipe() -> Partial:
+ return Partial(
+ pretrain,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ )
+
+
+@factory(name=NAME, for_task="llm.finetune")
+def finetune_recipe() -> Partial:
+ return Partial(
+ finetune,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ peft=gpt_lora,
+ resume=hf_resume,
+ )
diff --git a/nemo/collections/llm/recipes/llama3_8b.py b/nemo/collections/llm/recipes/llama3_8b.py
new file mode 100644
index 000000000000..34ce418a0701
--- /dev/null
+++ b/nemo/collections/llm/recipes/llama3_8b.py
@@ -0,0 +1,61 @@
+import pytorch_lightning as pl
+
+from nemo import lightning as nl
+from nemo.collections.llm.api import finetune, pretrain
+from nemo.collections.llm.gpt.data.api import squad
+from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel
+from nemo.collections.llm.peft.api import gpt_lora
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing
+from nemo.collections.llm.utils import Partial, factory
+
+NAME = "llama3_8b"
+
+
+@factory(name=NAME)
+def model() -> pl.LightningModule:
+ return LlamaModel(Llama3Config8B(seq_length=16384))
+
+
+@factory(name=NAME)
+def trainer(devices=8) -> nl.Trainer:
+ strategy = nl.MegatronStrategy(tensor_model_parallel_size=2)
+
+ return nl.Trainer(
+ devices=devices,
+ max_steps=100,
+ accelerator="gpu",
+ strategy=strategy,
+ plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"),
+ )
+
+
+@factory(name=NAME + "_hf")
+def hf_resume() -> nl.AutoResume:
+ return nl.AutoResume(import_path="hf://meta-llama/Meta-Llama-3-8B")
+
+
+@factory(name=NAME, for_task="llm.pretrain")
+def pretrain_recipe() -> Partial:
+ return Partial(
+ pretrain,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ )
+
+
+@factory(name=NAME, for_task="llm.finetune")
+def finetune_recipe() -> Partial:
+ return Partial(
+ finetune,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ peft=gpt_lora,
+ resume=hf_resume,
+ )
diff --git a/nemo/collections/llm/recipes/llama3_8b_16k.py b/nemo/collections/llm/recipes/llama3_8b_16k.py
new file mode 100644
index 000000000000..3a590f26894e
--- /dev/null
+++ b/nemo/collections/llm/recipes/llama3_8b_16k.py
@@ -0,0 +1,45 @@
+import pytorch_lightning as pl
+
+from nemo import lightning as nl
+from nemo.collections.llm.api import pretrain
+from nemo.collections.llm.gpt.data.api import squad
+from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing
+from nemo.collections.llm.utils import Partial, factory
+
+NAME = "llama3_8b_16k"
+
+
+@factory(name=NAME)
+def model() -> pl.LightningModule:
+ return LlamaModel(Llama3Config8B(seq_length=16384))
+
+
+@factory(name=NAME)
+def trainer(devices=8) -> nl.Trainer:
+ strategy = nl.MegatronStrategy(
+ tensor_model_parallel_size=4,
+ context_parallel_size=2,
+ sequence_parallel=True,
+ )
+
+ return nl.Trainer(
+ devices=devices,
+ max_steps=100,
+ accelerator="gpu",
+ strategy=strategy,
+ plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"),
+ )
+
+
+@factory(name=NAME, for_task="llm.pretrain")
+def pretrain_recipe() -> Partial:
+ return Partial(
+ pretrain,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ )
diff --git a/nemo/collections/llm/recipes/llama3_8b_64k.py b/nemo/collections/llm/recipes/llama3_8b_64k.py
new file mode 100644
index 000000000000..c826feb28901
--- /dev/null
+++ b/nemo/collections/llm/recipes/llama3_8b_64k.py
@@ -0,0 +1,45 @@
+import pytorch_lightning as pl
+
+from nemo import lightning as nl
+from nemo.collections.llm.api import pretrain
+from nemo.collections.llm.gpt.data.api import squad
+from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing
+from nemo.collections.llm.utils import Partial, factory
+
+NAME = "llama3_8b_64k"
+
+
+@factory(name=NAME)
+def model() -> pl.LightningModule:
+ return LlamaModel(Llama3Config8B(seq_length=65536))
+
+
+@factory(name=NAME)
+def trainer(devices=8) -> nl.Trainer:
+ strategy = nl.MegatronStrategy(
+ tensor_model_parallel_size=8,
+ context_parallel_size=4,
+ sequence_parallel=True,
+ )
+
+ return nl.Trainer(
+ devices=devices,
+ max_steps=100,
+ accelerator="gpu",
+ strategy=strategy,
+ plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"),
+ )
+
+
+@factory(name=NAME, for_task="llm.pretrain")
+def pretrain_recipe() -> Partial:
+ return Partial(
+ pretrain,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ )
diff --git a/nemo/collections/llm/recipes/log/__init__.py b/nemo/collections/llm/recipes/log/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/nemo/collections/llm/recipes/log/default.py b/nemo/collections/llm/recipes/log/default.py
new file mode 100644
index 000000000000..a40e141bfa95
--- /dev/null
+++ b/nemo/collections/llm/recipes/log/default.py
@@ -0,0 +1,15 @@
+from nemo import lightning as nl
+from nemo.collections.llm.utils import factory
+
+
+@factory
+def default_log() -> nl.NeMoLogger:
+ ckpt = nl.ModelCheckpoint(
+ save_best_model=True,
+ save_last=True,
+ monitor="reduced_train_loss",
+ save_top_k=2,
+ save_on_train_epoch_end=True,
+ )
+
+ return nl.NeMoLogger(ckpt=ckpt)
diff --git a/nemo/collections/llm/recipes/mistral.py b/nemo/collections/llm/recipes/mistral.py
new file mode 100644
index 000000000000..12af8d5d18ff
--- /dev/null
+++ b/nemo/collections/llm/recipes/mistral.py
@@ -0,0 +1,61 @@
+import pytorch_lightning as pl
+
+from nemo import lightning as nl
+from nemo.collections.llm.api import finetune, pretrain
+from nemo.collections.llm.gpt.data.api import squad
+from nemo.collections.llm.gpt.model.mistral import MistralConfig7B, MistralModel
+from nemo.collections.llm.peft.api import gpt_lora
+from nemo.collections.llm.recipes.log.default import default_log
+from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing
+from nemo.collections.llm.utils import Partial, factory
+
+NAME = "mistral"
+
+
+@factory(name=NAME)
+def model() -> pl.LightningModule:
+ return MistralModel(MistralConfig7B())
+
+
+@factory(name=NAME)
+def trainer(devices=8) -> nl.Trainer:
+ strategy = nl.MegatronStrategy(tensor_model_parallel_size=2)
+
+ return nl.Trainer(
+ devices=devices,
+ max_steps=100,
+ accelerator="gpu",
+ strategy=strategy,
+ plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"),
+ )
+
+
+@factory(name=NAME + "_hf")
+def hf_resume() -> nl.AutoResume:
+ return nl.AutoResume(import_path="hf://mistralai/Mistral-7B-v0.3")
+
+
+@factory(name=NAME, for_task="llm.pretrain")
+def pretrain_recipe() -> Partial:
+ return Partial(
+ pretrain,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ )
+
+
+@factory(name=NAME, for_task="llm.finetune")
+def finetune_recipe() -> Partial:
+ return Partial(
+ finetune,
+ model=model,
+ trainer=trainer,
+ data=squad,
+ log=default_log,
+ optim=adam_with_cosine_annealing,
+ peft=gpt_lora,
+ resume=hf_resume,
+ )
diff --git a/nemo/collections/llm/recipes/optim/__init__.py b/nemo/collections/llm/recipes/optim/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/nemo/collections/llm/recipes/optim/adam.py b/nemo/collections/llm/recipes/optim/adam.py
new file mode 100644
index 000000000000..4229001b2130
--- /dev/null
+++ b/nemo/collections/llm/recipes/optim/adam.py
@@ -0,0 +1,16 @@
+from megatron.core.optimizer import OptimizerConfig
+
+from nemo import lightning as nl
+from nemo.collections.llm.utils import factory
+
+
+@factory
+def adam_with_cosine_annealing() -> nl.OptimizerModule:
+ return nl.MegatronOptimizerModule(
+ config=OptimizerConfig(optimizer="adam", lr=0.001, use_distributed_optimizer=True),
+ lr_scheduler=nl.lr_scheduler.CosineAnnealingScheduler(),
+ )
+
+
+# TODO: Fix the name-arg inside the factory-function so we don't need to do this
+with_cosine_annealing = adam_with_cosine_annealing
diff --git a/nemo/collections/llm/utils.py b/nemo/collections/llm/utils.py
index b4382d0afd5f..5c087f60590a 100644
--- a/nemo/collections/llm/utils.py
+++ b/nemo/collections/llm/utils.py
@@ -42,9 +42,8 @@ def factory(*args: Any, **kwargs: Any) -> Union[Callable[[T], T], T]:
try:
import nemo_sdk as sdk
- if not args and not kwargs:
- # Used as @factory without arguments
- return sdk.factory()
+ if not args:
+ return sdk.factory(**kwargs)
else:
# Used as @factory(*args, **kwargs)
return sdk.factory(*args, **kwargs)
diff --git a/nemo/collections/multimodal/data/clip/clip_dataset.py b/nemo/collections/multimodal/data/clip/clip_dataset.py
index 6b63d546194a..448efba4b8ba 100644
--- a/nemo/collections/multimodal/data/clip/clip_dataset.py
+++ b/nemo/collections/multimodal/data/clip/clip_dataset.py
@@ -57,8 +57,9 @@ def tokenize(texts: Union[str, List[str]], tokenizer: Any, context_length: int =
bos_id = tokenizer.bos_id
eos_id = tokenizer.eos_id
- all_tokens = [[bos_id] + tokenizer.text_to_ids(text) + [eos_id] for text in texts]
- result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)
+ pad_id = tokenizer.pad_id
+ all_tokens = [([bos_id] if bos_id is not None else []) + tokenizer.text_to_ids(text) + [eos_id] for text in texts]
+ result = torch.ones(len(all_tokens), context_length, dtype=torch.long) * pad_id
for i, tokens in enumerate(all_tokens):
if len(tokens) > context_length:
diff --git a/nemo/collections/multimodal/data/neva/neva_dataset.py b/nemo/collections/multimodal/data/neva/neva_dataset.py
index 96aa556cff47..23279c609ef3 100644
--- a/nemo/collections/multimodal/data/neva/neva_dataset.py
+++ b/nemo/collections/multimodal/data/neva/neva_dataset.py
@@ -20,7 +20,6 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Sequence, Tuple, Union
-import decord
import numpy as np
import torch
import torch.nn.functional as F
@@ -50,6 +49,11 @@
MAX_NUM_IMAGES = 1
IGNORE_INDEX = -1
+try:
+ import decord
+except Exception:
+ logging.warning("The package `decord` was not installed in this environment.")
+
try:
from megatron.core.datasets.indexed_dataset import IndexedDataset
diff --git a/nemo/collections/multimodal/models/multimodal_llm/neva/neva_model.py b/nemo/collections/multimodal/models/multimodal_llm/neva/neva_model.py
index 7a1f1013df4a..ac259268ec58 100644
--- a/nemo/collections/multimodal/models/multimodal_llm/neva/neva_model.py
+++ b/nemo/collections/multimodal/models/multimodal_llm/neva/neva_model.py
@@ -71,6 +71,8 @@
try:
from megatron.core import InferenceParams, dist_checkpointing, parallel_state, tensor_parallel
+ from megatron.core.dist_checkpointing.dict_utils import dict_list_map_inplace
+ from megatron.core.dist_checkpointing.mapping import LocalNonpersitentObject, ShardedObject
from megatron.core.models.gpt import GPTModel as MCoreGPTModel
from megatron.core.num_microbatches_calculator import get_num_microbatches
from megatron.core.pipeline_parallel.schedules import get_forward_backward_func
@@ -83,6 +85,12 @@
HAVE_MEGATRON_CORE = False
+def skip_fp8_load(x):
+ if isinstance(x, ShardedObject) and 'fused_attention' in x.key and '_extra_state' in x.key:
+ x = LocalNonpersitentObject(x.data) # use the FP8 state from initialization, not from ckpt
+ return x
+
+
class FrozenCLIPVisionTransformer(CLIPVisionTransformer):
"""Frozen version of CLIPVisionTransformer"""
@@ -391,8 +399,8 @@ def replace_media_embeddings(self, input_ids, inputs_embeds, media):
t_token_start, t_token_end = start, start + T
s_token_start, s_token_end = start + T, start + T + M
assert s_token_end == end + 1, "Token replacement error"
- inputs_embeds[idx, t_token_start:t_token_end] = temporal_tokens[idx]
- inputs_embeds[idx, s_token_start:s_token_end] = spatial_tokens[idx]
+ inputs_embeds[idx, t_token_start:t_token_end] = t_tokens[idx]
+ inputs_embeds[idx, s_token_start:s_token_end] = s_tokens[idx]
elif self.visual_token_format == 'im_vid_start_end': # v1.5 lita
if not self.use_media_start_end:
# replace the media start and media end embedding with
@@ -522,6 +530,9 @@ def _load_model_weights(self, nemo_path):
sharded_state_dict = None
if getattr(self, "sharded_state_dict", None) is not None:
sharded_state_dict = self.sharded_state_dict(prefix="model.")
+ # WAR: This is a temporary fix to skip loading FP8 parameters for Dot Product Attention
+ # TODO(yuya): Check if this skip affecting fp8 native checkpoints loading
+ dict_list_map_inplace(skip_fp8_load, sharded_state_dict)
state_dict, self.is_dist_ckpt = load_nemo_model_weights(nemo_path, sharded_state_dict)
return state_dict
@@ -732,8 +743,7 @@ def dummy():
config=self.transformer_config,
transformer_layer_spec=get_specs(
self.spec_name,
- self.transformer_config.num_moe_experts,
- self.transformer_config.moe_grouped_gemm,
+ self.transformer_config,
self.transformer_engine,
),
vocab_size=self.cfg.get('override_vocab_size', self.padded_vocab_size),
diff --git a/nemo/collections/multimodal/models/text_to_image/controlnet/controlnet.py b/nemo/collections/multimodal/models/text_to_image/controlnet/controlnet.py
index fc661d91ab61..65e31b5343de 100644
--- a/nemo/collections/multimodal/models/text_to_image/controlnet/controlnet.py
+++ b/nemo/collections/multimodal/models/text_to_image/controlnet/controlnet.py
@@ -550,11 +550,18 @@ def load_from_unet(self, from_pretrained_unet, from_NeMo=True):
print("Loading unet blocks from sd")
state_dict = torch.load(from_pretrained_unet, map_location='cpu')
- state_dict = state_dict['state_dict']
+ if 'state_dict' in state_dict.keys():
+ state_dict = state_dict['state_dict']
model_state_dict = self.state_dict()
+ model_state_keys = model_state_dict.keys()
re_state_dict = {}
for key_, value_ in state_dict.items():
+ # check if key is a raw parameter
+ if key_ in model_state_keys:
+ re_state_dict[key_] = value_
+ continue
+ # prune from model prefix
if key_.startswith('model.model.diffusion_model'):
re_state_dict[key_.replace('model.model.diffusion_model.', '')] = value_
if key_.startswith('model.diffusion_model'):
diff --git a/nemo/collections/multimodal/models/vision_language_foundation/clip/megatron_clip_models.py b/nemo/collections/multimodal/models/vision_language_foundation/clip/megatron_clip_models.py
index d811ce94dbea..d4aa3755b385 100644
--- a/nemo/collections/multimodal/models/vision_language_foundation/clip/megatron_clip_models.py
+++ b/nemo/collections/multimodal/models/vision_language_foundation/clip/megatron_clip_models.py
@@ -501,8 +501,7 @@ def __init__(
add_class_token = True
vision_layer_spec = get_specs(
model_cfg.text.get('name', ''),
- vision_transformer_config.num_moe_experts,
- vision_transformer_config.moe_grouped_gemm,
+ vision_transformer_config,
model_cfg.get('transformer_engine', True),
)
vision_layer_spec.submodules.self_attention.params['attn_mask_type'] = MCoreAttnMaskType.no_mask
@@ -527,8 +526,7 @@ def __init__(
config=text_transformer_config,
transformer_layer_spec=get_specs(
model_cfg.text.get('name', ''),
- text_transformer_config.num_moe_experts,
- text_transformer_config.moe_grouped_gemm,
+ text_transformer_config,
model_cfg.get('transformer_engine', True),
),
vocab_size=model_cfg.text.get('override_vocab_size', padded_vocab_size),
@@ -984,6 +982,7 @@ def training_step(self, dataloader_iter):
for module in modules:
if isinstance(module, (Float16Module, MCoreFloat16Module)):
module = module.module
+ module = module.text_encoder
if not self.mcore_gpt:
module = module.language_model
if hasattr(module, 'embedding'):
diff --git a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py
index 1d6b8395a58f..9a7f0a572743 100644
--- a/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py
+++ b/nemo/collections/multimodal/modules/imagen/diffusionmodules/blocks.py
@@ -58,10 +58,9 @@ def check_cuda():
dprops = th.cuda.get_device_properties(cur_device)
is_sm75 = dprops.major == 7 and dprops.minor == 5
- is_sm8x = dprops.major == 8 and dprops.minor >= 0
- is_sm90 = dprops.major == 9 and dprops.minor >= 0
+ is_sm8x_or_later = dprops.major >= 8
- return is_sm8x or is_sm75 or is_sm90
+ return is_sm75 or is_sm8x_or_later
try:
@@ -154,7 +153,9 @@ def __init__(
self.use_scale_shift_norm = use_scale_shift_norm
self.in_layers = nn.Sequential(
- normalization(channels), nn.SiLU(), conv_nd(dims, channels, self.out_channels, 3, padding=1),
+ normalization(channels),
+ nn.SiLU(),
+ conv_nd(dims, channels, self.out_channels, 3, padding=1),
)
self.updown = up or down
@@ -173,7 +174,11 @@ def __init__(
self.h_upd = self.x_upd = nn.Identity()
self.emb_layers = nn.Sequential(
- nn.SiLU(), linear(emb_channels, 2 * self.out_channels if use_scale_shift_norm else self.out_channels,),
+ nn.SiLU(),
+ linear(
+ emb_channels,
+ 2 * self.out_channels if use_scale_shift_norm else self.out_channels,
+ ),
)
self.out_layers = nn.Sequential(
normalization(self.out_channels),
@@ -263,7 +268,11 @@ def __init__(
)
self.emb_layers = nn.Sequential(
- nn.SiLU(), nn.Linear(emb_channels, 2 * out_channels if use_scale_shift_norm else out_channels,),
+ nn.SiLU(),
+ nn.Linear(
+ emb_channels,
+ 2 * out_channels if use_scale_shift_norm else out_channels,
+ ),
)
self.out_layers = nn.Sequential(
diff --git a/nemo/collections/multimodal/modules/stable_diffusion/attention.py b/nemo/collections/multimodal/modules/stable_diffusion/attention.py
index e748bcbf93a0..492f68af032e 100644
--- a/nemo/collections/multimodal/modules/stable_diffusion/attention.py
+++ b/nemo/collections/multimodal/modules/stable_diffusion/attention.py
@@ -56,10 +56,9 @@ def check_cuda():
dprops = torch.cuda.get_device_properties(cur_device)
is_sm75 = dprops.major == 7 and dprops.minor == 5
- is_sm8x = dprops.major == 8 and dprops.minor >= 0
- is_sm90 = dprops.major == 9 and dprops.minor >= 0
+ is_sm8x_or_later = dprops.major >= 8
- return is_sm8x or is_sm75 or is_sm90
+ return is_sm75 or is_sm8x_or_later
try:
diff --git a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py
index eb449c5406b9..b94624b33ba2 100644
--- a/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py
+++ b/nemo/collections/multimodal/modules/stable_diffusion/diffusionmodules/openaimodel.py
@@ -971,6 +971,8 @@ def __init__(
)
logging.info(f"Missing keys: {missing_key}")
logging.info(f"Unexpected keys: {unexpected_keys}")
+ else:
+ logging.info(f"There are no missing keys, model loaded properly!")
if unet_precision == "fp16-mixed": # AMP O2
self.convert_to_fp16()
@@ -1217,6 +1219,7 @@ def _state_key_mapping(self, state_dict: dict):
def _load_pretrained_model(self, state_dict, ignore_mismatched_sizes=False, from_NeMo=False):
state_dict = self._strip_unet_key_prefix(state_dict)
if not from_NeMo:
+ logging.info("creating state key mapping from HF")
state_dict = self._state_key_mapping(state_dict)
state_dict = self._legacy_unet_ckpt_mapping(state_dict)
diff --git a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py
index ab33532c3c1f..0443d75a61e8 100644
--- a/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py
+++ b/nemo/collections/multimodal/modules/stable_diffusion/encoders/modules.py
@@ -689,6 +689,8 @@ def load_model(self, cfg, state_dict):
model_cfg=cfg,
model_parallel_config=ModelParallelConfig(),
padded_vocab_size=padded_vocab_size,
+ vision_transformer_config=None, # assumed mcore to be false
+ text_transformer_config=None,
pre_process=cfg.text.pre_process,
post_process=cfg.text.post_process,
)
diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py
index 1fe932ec046c..d564993f7806 100644
--- a/nemo/collections/multimodal/parts/utils.py
+++ b/nemo/collections/multimodal/parts/utils.py
@@ -15,7 +15,6 @@
import tempfile
from typing import Any, Callable, Tuple
-import decord
import numpy as np
import torch
from omegaconf import DictConfig, OmegaConf, open_dict
@@ -33,6 +32,11 @@
from nemo.utils import AppState, logging
from nemo.utils.model_utils import inject_model_parallel_rank
+try:
+ import decord
+except Exception:
+ logging.warning("The package `decord` was not installed in this environment.")
+
try:
from megatron.core import dist_checkpointing
diff --git a/nemo/collections/multimodal/speech_llm/models/__init__.py b/nemo/collections/multimodal/speech_llm/models/__init__.py
index ec188828ec87..ee51bd94af2c 100644
--- a/nemo/collections/multimodal/speech_llm/models/__init__.py
+++ b/nemo/collections/multimodal/speech_llm/models/__init__.py
@@ -12,4 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from nemo.collections.multimodal.speech_llm.models.modular_models import ModularAudioGPTModel
+from nemo.collections.multimodal.speech_llm.models.modular_models import (
+ CrossAttendModularAudioGPTModel,
+ ModularAudioGPTModel,
+)
diff --git a/nemo/collections/multimodal/speech_llm/models/modular_models.py b/nemo/collections/multimodal/speech_llm/models/modular_models.py
index aa21cf95bfa4..49df57f202fa 100644
--- a/nemo/collections/multimodal/speech_llm/models/modular_models.py
+++ b/nemo/collections/multimodal/speech_llm/models/modular_models.py
@@ -69,7 +69,7 @@
HAVE_MEGATRON_CORE = False
-__all__ = ["ModularAudioGPTModel"]
+__all__ = ["ModularAudioGPTModel", "CrossAttendModularAudioGPTModel"]
default_inference_config = {'tokens_to_generate': 30}
diff --git a/nemo/collections/multimodal/speech_llm/models/modular_t5_models.py b/nemo/collections/multimodal/speech_llm/models/modular_t5_models.py
index e9dacca17bc4..f5263496b75e 100644
--- a/nemo/collections/multimodal/speech_llm/models/modular_t5_models.py
+++ b/nemo/collections/multimodal/speech_llm/models/modular_t5_models.py
@@ -38,6 +38,7 @@
MultiAudioPerceptionModule,
)
from nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model import MegatronT5LoraModel
+from nemo.collections.nlp.models.language_modeling.megatron_t5_model import MegatronT5Model
from nemo.collections.nlp.models.language_modeling.megatron_t5_sft_model import MegatronT5SFTModel
from nemo.collections.nlp.models.nlp_model import NLPModel
from nemo.collections.nlp.modules.common.megatron.utils import (
@@ -62,7 +63,7 @@
HAVE_MEGATRON_CORE = False
-__all__ = ["ModularizedAudioT5Model"]
+__all__ = ["ModularizedAudioT5Model", "DecoderTextPromptModularizedAudioT5Model"]
default_inference_config = {'tokens_to_generate': 30}
diff --git a/nemo/collections/multimodal/speech_llm/modules/__init__.py b/nemo/collections/multimodal/speech_llm/modules/__init__.py
index d9562652ce84..7effb0894da7 100644
--- a/nemo/collections/multimodal/speech_llm/modules/__init__.py
+++ b/nemo/collections/multimodal/speech_llm/modules/__init__.py
@@ -17,4 +17,5 @@
AudioPerceptionModule,
MultiAudioPerceptionModule,
MultiFeatureAggregator,
+ TransformerCrossAttention,
)
diff --git a/nemo/collections/multimodal/speech_llm/modules/perception_modules.py b/nemo/collections/multimodal/speech_llm/modules/perception_modules.py
index a42c7d06cba0..021ac1ff3dad 100644
--- a/nemo/collections/multimodal/speech_llm/modules/perception_modules.py
+++ b/nemo/collections/multimodal/speech_llm/modules/perception_modules.py
@@ -29,7 +29,7 @@
from nemo.core.neural_types import AcousticEncodedRepresentation, AudioSignal, LengthsType, NeuralType, SpectrogramType
from nemo.utils.decorators import experimental
-__all__ = ["AudioPerceptionModule", "MultiAudioPerceptionModule"]
+__all__ = ["AudioPerceptionModule", "MultiAudioPerceptionModule", "TransformerCrossAttention"]
class AudioPerceptionModule(NeuralModule, Exportable):
diff --git a/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py b/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py
index 3c57b1af4cca..8bca618dce3d 100644
--- a/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py
+++ b/nemo/collections/nlp/data/information_retrieval/bert_embedding_dataset.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from random import choices, sample
from typing import Mapping, Optional
import datasets
@@ -50,7 +51,7 @@ def __init__(
num_hard_negatives: int = 4,
):
"""
- file_path: Path to a JSONL dataset with (query,pos_doc,neg_doc) triplets in jsonl format.
+ file_path: Path to a JSONL dataset with (query,pos_doc,neg_doc) triplets in jsonl format.
tokenizer: Tokenizer for the dataset. Instance of a class that inherits TokenizerSpec (ex: YTTM, SentencePiece).
max_seq_length (int): maximum sequence length for each dataset examples. Examples will either be truncated to fit this length or dropped if they cannot be truncated.
min_seq_length (int): min length of each data example in the dataset. Data examples will be dropped if they do not meet the min length requirements.
@@ -132,7 +133,10 @@ def __getitem__(self, idx):
if isinstance(idx, np.uint32):
idx = idx.item()
- assert idx < len(self.indexed_dataset)
+ if idx is not None:
+ assert idx < len(self.indexed_dataset)
+ else:
+ idx = -1
# idx may < 0 because we pad_samples_to_global_batch_size, e.g. id = -1
if idx < 0:
idx = len(self) + idx
@@ -159,10 +163,16 @@ def _process_example(self, example):
if self.data_type == 'train':
q = self.tokenizer.text_to_ids("query: " + example['query'].strip())
d = self.tokenizer.text_to_ids("passage: " + example['pos_doc'].strip())
- nd = [
- self.tokenizer.text_to_ids("passage: " + example['neg_doc'][i].strip())
- for i in range(self.num_hard_negatives)
- ]
+ # handle cases where the required number of hard negatives are not present
+ if len(example['neg_doc']) < self.num_hard_negatives:
+ nd = example['neg_doc']
+ # sample rest with replacement
+ nd = nd + choices(example['neg_doc'], k=self.num_hard_negatives - len(example['neg_doc']))
+ else:
+ # sample without replacement
+ nd = sample(example['neg_doc'], k=self.num_hard_negatives)
+ assert len(nd) == self.num_hard_negatives, "Error in sampling required number of hard negatives"
+ nd = [self.tokenizer.text_to_ids("passage: " + ex.strip()) for ex in nd]
elif self.data_type == 'query':
q = self.tokenizer.text_to_ids("query: " + example['query'].strip())
@@ -292,6 +302,7 @@ def collate_fn(self, batch):
'input_ids': input_ids,
'token_type_ids': torch.zeros_like(input_ids),
'attention_mask': attention_mask,
+ 'metadata': metadata,
}
return processed_batch
diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
index caf8dbec6c7a..2e21c57dddd3 100644
--- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
+++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_dataset.py
@@ -57,6 +57,7 @@ def __init__(
tokens_to_generate: int = 0,
memmap_workers: Optional[int] = None,
hf_dataset: bool = False,
+ global_sample_mapping: bool = False,
truncation_method: str = 'right',
special_tokens: Optional[Mapping[str, str]] = None, # special tokens, a dictory of {token_type: token}
is_test: bool = False,
@@ -83,6 +84,7 @@ def __init__(
index_mapping_dir: Directory to save the index mapping to. If None, will write to the same folder as the dataset.
prompt_template: Prompt template to inject via an fstring. Formatted like Q: {context_key}\n\nA: {label_key}
hf_dataset: Whether to load the json file with the HuggingFace dataset. otherwise, will load the jsonl file with the JSONLMemMapDataset.
+ global_sample_mapping: Whether to shuffle all data together, or shuffle the dataset within each epoch
truncation_method: Truncation from which position. Options: ['left', 'right']
special_tokens: special tokens for the chat prompts, a dictionary of {token_type: token}. Default: {'system_turn_start': '', 'turn_start': '', 'label_start': '', 'end_of_turn': '\n', "end_of_name": "\n"}
is_test: Whether this dataset is the test split.
@@ -109,6 +111,7 @@ def __init__(
self.tokens_to_generate = tokens_to_generate
self.memmap_workers = memmap_workers
self.hf_dataset = hf_dataset
+ self.global_sample_mapping = global_sample_mapping
self.truncation_method = truncation_method
self.is_test = is_test
self.output_original_text = output_original_text
@@ -176,7 +179,11 @@ def _maybe_validate_prompt_template(self):
def _build_samples_mapping(self):
if self.max_num_samples is not None:
- osm = OnlineSampleMapping(dataset_size=len(self.indexed_dataset), num_samples=self.max_num_samples)
+ osm = (
+ OnlineSampleMapping(dataset_size=len(self.indexed_dataset), num_samples=self.max_num_samples)
+ if not self.global_sample_mapping
+ else None
+ )
self.samples_mapping = get_samples_mapping(
indexed_dataset=self.indexed_dataset,
data_prefix=self.file_path,
diff --git a/nemo/collections/nlp/models/information_retrieval/megatron_bert_embedding_model.py b/nemo/collections/nlp/models/information_retrieval/megatron_bert_embedding_model.py
index 485c49cbd927..d4df93377db6 100644
--- a/nemo/collections/nlp/models/information_retrieval/megatron_bert_embedding_model.py
+++ b/nemo/collections/nlp/models/information_retrieval/megatron_bert_embedding_model.py
@@ -13,6 +13,9 @@
# limitations under the License.
import logging
+import os
+
+import numpy as np
try:
from megatron.core import parallel_state
@@ -29,6 +32,8 @@
from omegaconf import DictConfig, OmegaConf, open_dict
from omegaconf.dictconfig import DictConfig
from pytorch_lightning.trainer.trainer import Trainer
+from torch.distributed import all_gather as all_gather_no_backprop
+from torch.distributed.nn.functional import all_gather as all_gather_with_backprop
from nemo.collections.nlp.data.information_retrieval.bert_embedding_dataset import BertEmbeddingDataset
from nemo.collections.nlp.data.language_modeling.megatron.data_samplers import (
@@ -62,6 +67,14 @@
HAVE_MEGATRON_CORE = False
+def listify(tensor):
+ l_tensor = []
+ for t in tensor:
+ r = t[:].unsqueeze(0).cpu()
+ l_tensor.append(r)
+ return l_tensor
+
+
class MegatronBertEmbeddingModel(MegatronBertModel):
"""
Megatron Bert pretraining.
@@ -74,6 +87,10 @@ def __init__(self, cfg: DictConfig, trainer: Trainer):
self.cross_entropy_loss = torch.nn.CrossEntropyLoss(label_smoothing=cfg.get('label_smoothing', 0.0))
softmax_temp = cfg.get('softmax_temp', 0.05)
self.scale = 1.0 / softmax_temp
+ self.hard_negatives_to_train = self.cfg.data.get("hard_negatives_to_train", 4)
+ self.global_inbatch_negatives = self.cfg.get("global_inbatch_negatives", True)
+ self.backprop_type = self.cfg.get("backprop_type", "local")
+ assert self.backprop_type in ["local", "global"], "Backprop type must be `local` or `global`"
def model_provider_func(self, pre_process, post_process):
cfg = self.cfg
@@ -141,34 +158,61 @@ def model_provider_func(self, pre_process, post_process):
return model
- def build_train_valid_test_datasets(self):
+ def build_train_valid_test_datasets(self, is_train=True):
self._train_ds = None
self._validation_ds = None
self._test_ds = None
- self._train_ds = BertEmbeddingDataset(
- self.cfg.data.data_train,
- tokenizer=self.tokenizer,
- add_bos=True,
- num_hard_negatives=self.cfg.data.get("hard_negatives_to_train", 4),
- max_seq_length=self.cfg.encoder_seq_length,
- )
- if self.cfg.data.data_validation:
- self._validation_ds = BertEmbeddingDataset(
- self.cfg.data.data_validation,
+ if is_train:
+ self._train_ds = BertEmbeddingDataset(
+ self.cfg.data.data_train,
tokenizer=self.tokenizer,
add_bos=True,
num_hard_negatives=self.cfg.data.get("hard_negatives_to_train", 4),
max_seq_length=self.cfg.encoder_seq_length,
)
+ if self.cfg.data.data_validation:
+ self._validation_ds = BertEmbeddingDataset(
+ self.cfg.data.data_validation,
+ tokenizer=self.tokenizer,
+ add_bos=True,
+ num_hard_negatives=self.cfg.data.get("hard_negatives_to_train", 4),
+ max_seq_length=self.cfg.encoder_seq_length,
+ )
+
+ else:
+ logging.info(f'Building test dataset')
+ if self.cfg.data.data_test.query_file_names is None or self.cfg.data.data_test.doc_file_names is None:
+ return []
+
+ query_dataset = BertEmbeddingDataset(
+ file_path=self.cfg.data.data_test.query_file_names[0],
+ tokenizer=self.tokenizer,
+ max_seq_length=self.cfg.encoder_seq_length,
+ add_bos=True,
+ add_eos=True,
+ data_type="query",
+ )
+ doc_dataset = BertEmbeddingDataset(
+ file_path=self.cfg.data.data_test.doc_file_names[0],
+ tokenizer=self.tokenizer,
+ max_seq_length=self.cfg.encoder_seq_length,
+ add_bos=True,
+ add_eos=True,
+ data_type="doc",
+ )
+
+ self._test_ds = [query_dataset, doc_dataset]
if self._train_ds is not None:
logging.info(f'Length of train dataset: {len(self._train_ds)}')
if self._validation_ds is not None:
logging.info(f'Length of val dataset: {len(self._validation_ds)}')
if self._test_ds is not None:
- logging.info(f'Length of test dataset: {len(self._test_ds)}')
+ logging.info(f'Length of test query dataset: {len(self._test_ds[0])}')
+ logging.info(f'Length of test doc dataset: {len(self._test_ds[1])}')
+
logging.info(f'Finished building SBert datasets.')
return self._train_ds, self._validation_ds, self._test_ds
@@ -202,6 +246,9 @@ def setup(self, stage=None):
if stage == 'predict':
return
+ elif stage == 'test':
+ self.build_train_valid_test_datasets(is_train=False)
+ self.setup_test_data(self.cfg.data)
else:
# TODO: consider adding a ModelPT guard to check if model is being restored.
# allowing restored models to optionally setup datasets
@@ -292,7 +339,8 @@ def build_pretraining_data_loader(self, dataset, consumed_samples):
global_batch_size=self.cfg.global_batch_size,
data_parallel_rank=parallel_state.get_data_parallel_rank(),
data_parallel_size=parallel_state.get_data_parallel_world_size(),
- drop_last=self.cfg.get('drop_last', True),
+ drop_last=self.cfg.get('drop_last', False),
+ pad_samples_to_global_batch_size=not self.cfg.get('drop_last', False),
)
elif self.cfg.data.dataloader_type == 'cyclic':
batch_sampler = MegatronPretrainingRandomSampler(
@@ -301,7 +349,8 @@ def build_pretraining_data_loader(self, dataset, consumed_samples):
micro_batch_size=self.cfg.micro_batch_size,
data_parallel_rank=parallel_state.get_data_parallel_rank(),
data_parallel_size=parallel_state.get_data_parallel_world_size(),
- drop_last=self.cfg.get('drop_last', True),
+ drop_last=self.cfg.get('drop_last', False),
+ pad_samples_to_global_batch_size=not self.cfg.get('drop_last', False),
)
else:
raise ValueError('cfg.data.dataloader_type must be "single" or "cyclic"')
@@ -337,6 +386,24 @@ def setup_validation_data(self, cfg):
)
self._validation_dl = self.build_pretraining_data_loader(self._validation_ds, consumed_samples)
+ def setup_eval_dataloader(self, datasets):
+ dataloaders = []
+ for dataset in datasets:
+ eval_dl = self.build_pretraining_data_loader(
+ dataset=dataset,
+ consumed_samples=0,
+ )
+ dataloaders.append(eval_dl)
+ return dataloaders
+
+ def setup_test_data(self, cfg):
+ if self._test_ds:
+ logging.info(
+ f'Setting up test dataloader with len(len(self._test_ds)): {len(self._test_ds[0])}, {len(self._test_ds[1])}'
+ )
+ self._test_dl = self.setup_eval_dataloader(self._test_ds)
+ return
+
def training_step(self, dataloader_iter):
self._optimizer.zero_grad()
@@ -437,7 +504,8 @@ def training_step(self, dataloader_iter):
def get_forward_output_and_loss_func(self):
def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_layers=None):
- batches = next(dataloader_iter)[0]
+ batches, _, dl_idx = next(dataloader_iter)
+ metadata = batches.pop('metadata')
batches = {k: v.cuda(non_blocking=True) for k, v in batches.items()}
if self.mcore_bert:
@@ -461,15 +529,170 @@ def loss_func(output_tensor):
loss = lm_loss
reduced_loss = average_losses_across_data_parallel_group([loss, lm_loss])
- return loss, {'loss': reduced_loss}
+ if 'hs' in loss_dict:
+ # metadata = batches.get('metadata', [{}] * len(batches['input_ids']))
+ return loss, {
+ 'loss': reduced_loss,
+ 'd_hs': loss_dict['hs'],
+ 'q_hs': loss_dict['hs'],
+ 'metadata': metadata,
+ 'dl_idx': dl_idx,
+ }
+ else:
+ return loss, {'loss': reduced_loss}
return output_tensor, loss_func
return fwd_output_and_loss_func
- def loss_func(self, output_tensor):
+ def validation_step(self, dataloader_iter):
+ prefix = "test" if self.trainer.testing else "val"
+ if self.cfg.data.dataloader_type == "LDDL":
+ seq_length = dataloader_iter.iterator.get_seqlen()
+ else:
+ seq_length = self.cfg.encoder_seq_length
+
+ fwd_bwd_function = get_forward_backward_func()
- chunks = output_tensor.chunk(self.cfg.micro_batch_size)
+ losses_reduced_per_micro_batch = fwd_bwd_function(
+ forward_step_func=self.get_forward_output_and_loss_func(),
+ data_iterator=self._make_data_iterator_list(dataloader_iter),
+ model=self.model,
+ num_microbatches=get_num_microbatches(),
+ forward_only=True,
+ seq_length=seq_length,
+ micro_batch_size=self.cfg.micro_batch_size,
+ )
+
+ if losses_reduced_per_micro_batch:
+ loss_tensors_list = [loss_reduced['loss'] for loss_reduced in losses_reduced_per_micro_batch]
+ loss_tensor = torch.vstack(loss_tensors_list)
+ loss_mean = loss_tensor.mean(axis=0)
+ else:
+ loss_mean = torch.tensor([0.0]).cuda()
+
+ loss = loss_mean[0]
+ if prefix == 'val':
+ self.validation_step_outputs.append(loss)
+ else:
+ assert len(losses_reduced_per_micro_batch) == 1
+ dataloader_idx = losses_reduced_per_micro_batch[0]['dl_idx']
+ self.test_step_outputs[dataloader_idx].append(losses_reduced_per_micro_batch[0])
+ return loss
+
+ def on_test_epoch_end(self):
+ for dataloader_idx, output in enumerate(self.test_step_outputs):
+ self.gather_and_maybe_write_predictions(output, self.cfg.data.data_test, 'test', dataloader_idx)
+
+ def gather_and_maybe_write_predictions(self, output, data_cfg, mode, dataloader_idx=0):
+ if not data_cfg.get("write_embeddings_to_file", False):
+ return True
+ gathered_output_batches = [None for _ in range(parallel_state.get_data_parallel_world_size())]
+ torch.distributed.all_gather_object(
+ gathered_output_batches,
+ [
+ {
+ 'q_hs': batch['q_hs'],
+ 'd_hs': batch['d_hs'],
+ 'metadata': batch['metadata'],
+ }
+ for batch in output
+ ],
+ group=parallel_state.get_data_parallel_group(),
+ )
+
+ # Remove duplicate examples due to distributed sampler.
+ deduplicated_outputs = {
+ 'q_hs': [],
+ 'd_hs': [],
+ 'metadata': [],
+ }
+ total_size, skipped = 0, 0
+ for rank in range(0, parallel_state.get_data_parallel_world_size()):
+ for batch in gathered_output_batches[rank]:
+ l_q_hs = listify(batch['q_hs'])
+ l_d_hs = listify(batch['d_hs'])
+ l_m = batch['metadata']
+ assert len(l_m) == len(l_q_hs) == len(l_d_hs)
+ for q_hs, d_hs, metadata in zip(
+ l_q_hs,
+ l_d_hs,
+ l_m,
+ ):
+ total_size += 1
+ if not metadata.get("__AUTOGENERATED__", False):
+ deduplicated_outputs['q_hs'].append(q_hs)
+ deduplicated_outputs['d_hs'].append(d_hs)
+ deduplicated_outputs['metadata'].append(metadata)
+ else:
+ skipped += 1
+
+ logging.info(
+ f"{total_size-skipped} deduplicated outputs in dataloader:{dataloader_idx}, (skipped {skipped} autogenerated examples)."
+ )
+
+ # Write predictions to file
+ if self.global_rank == 0 and data_cfg.get("write_embeddings_to_file", False):
+ logging.info(
+ f"Total deduplicated inference data size: {total_size} to {len(deduplicated_outputs['metadata'])}"
+ )
+
+ # Check if the user provided a prefix path to the file(s) they want to write.
+ if not hasattr(data_cfg, "output_file_path_prefix") or data_cfg.output_file_path_prefix is None:
+ raise ValueError(
+ f"Cannot write predictions to file when output_file_path_prefix is not set or present in the yaml config file."
+ )
+ filename_log_key = f"{mode}_{data_cfg.names[dataloader_idx]}"
+ consumed_samples = self._compute_consumed_samples_after_training_step()
+ fldr_path = f"{data_cfg.output_file_path_prefix}/consumed_samples{consumed_samples}/{filename_log_key}"
+ self.write_embeddings_to_file(deduplicated_outputs, fldr_path, dataloader_idx)
+ return deduplicated_outputs, total_size
+
+ def write_embeddings_to_file(self, outputs, output_file_path, d_idx):
+ emb_type = 'query' if d_idx == 0 else 'doc'
+ hs = torch.cat(outputs['q_hs' if d_idx == 0 else 'd_hs'], dim=0)
+ hs_npy = hs.float().numpy()
+ emb_fldr = f"{output_file_path}"
+ os.makedirs(emb_fldr, exist_ok=True)
+ with open(f"{output_file_path}/{emb_type}.ids", "w") as f:
+ for m in outputs['metadata']:
+ f.write(m[f"{emb_type}_id"] + "\n")
+ np.save(f"{emb_fldr}/{emb_type}.npy", hs_npy)
+ return True
+
+ def inference_loss_func(self, eos_tensors):
+ hs = eos_tensors
+ _blank = torch.zeros(1, device=hs.device, dtype=hs.dtype)[0]
+ return {
+ 'hs': eos_tensors,
+ 'lm loss': _blank,
+ }
+
+ def _gather_global_inbatch_representations(self, local_tensor):
+ local_tensor = local_tensor.contiguous()
+ if self.backprop_type == 'local':
+ global_tensors = [
+ torch.zeros_like(local_tensor) for _ in range(parallel_state.get_data_parallel_world_size())
+ ]
+ all_gather_no_backprop(global_tensors, local_tensor, group=parallel_state.get_data_parallel_group())
+ global_tensors[parallel_state.get_data_parallel_rank()] = local_tensor
+ global_tensors = torch.cat(global_tensors, dim=0)
+
+ else:
+ global_tensors = all_gather_with_backprop(local_tensor)
+ global_tensors = torch.cat(global_tensors, dim=0)
+
+ return global_tensors
+
+ def loss_func(self, output_tensor):
+ if self.global_inbatch_negatives and self.trainer.training:
+ output_tensor = self._gather_global_inbatch_representations(output_tensor)
+ if self.trainer.testing:
+ return self.inference_loss_func(output_tensor)
+
+ num_tensors_per_example = 2 + self.hard_negatives_to_train
+ bs = output_tensor.shape[0] // num_tensors_per_example
+ chunks = output_tensor.chunk(bs)
queries = torch.stack([item[0] for item in chunks]) # shape (bs, embedding_dim)
positives = torch.stack([item[1] for item in chunks]) # shape (bs, embedding_dim)
@@ -478,8 +701,7 @@ def loss_func(self, output_tensor):
) # shape (bs, bs); each positive is negative for other queries.
hard_negs = [
- torch.stack([item[i + 2] for item in chunks])
- for i in range(self.cfg.data.get("hard_negatives_to_train", 4))
+ torch.stack([item[i + 2] for item in chunks]) for i in range(self.hard_negatives_to_train)
] # List of length "num_negatives", each tensor of shape (bs, embedding_dim)
hard_negs_scores = (
@@ -493,6 +715,7 @@ def loss_func(self, output_tensor):
scores = torch.cat([pos_inbatch_negs_scores, hard_negs_scores], axis=1)
+ scores = scores.clamp(-1.0, 1.0)
scores *= self.scale
labels = torch.tensor(
diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py
index 6cce2b42be9c..f3299d488fd0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_full_te_layer_autocast_spec.py
@@ -35,7 +35,9 @@
try:
from megatron.core import parallel_state, tensor_parallel
+ from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
from megatron.core.transformer.spec_utils import ModuleSpec
+ from megatron.core.transformer.transformer_block import TransformerBlockSubmodules, get_num_layers_to_build
from megatron.core.transformer.transformer_layer import BaseTransformerLayer
from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
@@ -322,8 +324,10 @@ def sharded_state_dict(self, prefix: str = '', sharded_offsets: tuple = (), meta
# Use this spec to use the full Transformer layer from Transformer Engine
-def get_gpt_full_te_layer_autocast_spec() -> ModuleSpec:
+def get_gpt_full_te_layer_autocast_spec(transformer_config) -> ModuleSpec:
if not HAVE_MEGATRON_CORE or not HAVE_TE:
raise ImportError(IMPORT_ERROR)
-
- return ModuleSpec(module=TETransformerLayerAutocast)
+ num_layers = get_num_layers_to_build(transformer_config)
+ return TransformerBlockSubmodules(
+ layer_specs=[ModuleSpec(module=TETransformerLayerAutocast)] * num_layers, layer_norm=FusedLayerNorm
+ )
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
index 20d532d4764a..d0d239b21637 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py
@@ -379,8 +379,11 @@ def _enable_nvidia_optimizations(self):
# NVIDIA container version check
nvidia_torch_version = os.getenv('NVIDIA_PYTORCH_VERSION', None)
- # Support DLFW master container
- if nvidia_torch_version == 'master':
+ def is_official_release_version(nvidia_torch_version):
+ return re.fullmatch("[0-9][0-9]\.[0-9][0-9].*", nvidia_torch_version) # "YY.MM.*"
+
+ # Support DLFW dev container
+ if not is_official_release_version(nvidia_torch_version):
nvidia_torch_version = datetime.now().strftime('%y.%m')
if nvidia_torch_version is not None:
@@ -389,7 +392,7 @@ def _enable_nvidia_optimizations(self):
except Exception:
NVIDIA_TORCH_MAJOR = 0
try:
- NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1])
+ NVIDIA_TORCH_MINOR = int(nvidia_torch_version.split('.')[1][:2])
except Exception:
NVIDIA_TORCH_MINOR = 0
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
index 997235e639d2..6e7a145679e0 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py
@@ -138,7 +138,11 @@ def mcore_supports_moe() -> bool:
## TODO: This function will not work if TE is not installed
-def get_specs(spec_name, num_experts=None, moe_grouped_gemm=False, use_te=True, hyena_cfg: Dict = None):
+def get_specs(spec_name, transformer_config=None, use_te=True, hyena_cfg: Dict = None):
+ # else cases for backwards compatibility with neva
+ num_experts = transformer_config.num_moe_experts if transformer_config else None
+ moe_grouped_gemm = transformer_config.moe_grouped_gemm if transformer_config else False
+
if num_experts is not None:
assert mcore_supports_moe(), "Megatron-core >= v0.5.0 is required for MoE"
@@ -148,7 +152,7 @@ def get_specs(spec_name, num_experts=None, moe_grouped_gemm=False, use_te=True,
"": get_gpt_layer_local_spec(num_experts, moe_grouped_gemm),
"te_gpt": get_gpt_layer_with_transformer_engine_spec(num_experts, moe_grouped_gemm),
"megatron_falcon_gpt": get_falcon_layer_spec(),
- "megatron_gpt_full_te_layer_autocast": get_gpt_full_te_layer_autocast_spec(),
+ "megatron_gpt_full_te_layer_autocast": get_gpt_full_te_layer_autocast_spec(transformer_config),
"modelopt": get_gpt_layer_modelopt_spec(num_experts),
"te_gpt_hyena": get_gpt_layer_with_te_and_hyena_spec(hyena_cfg),
}
@@ -415,8 +419,7 @@ def model_provider_func(self, pre_process, post_process):
config=self.transformer_config,
transformer_layer_spec=get_specs(
self.spec_name,
- self.transformer_config.num_moe_experts,
- self.transformer_config.moe_grouped_gemm,
+ self.transformer_config,
self.transformer_engine,
self.cfg.get('hyena', None),
),
@@ -816,7 +819,9 @@ def training_step(self, dataloader_iter):
ignore_virtual=True
):
if (
- self.cfg.get('defer_embedding_wgrad_compute', False) and self.mcore_gpt
+ self.cfg.get('defer_embedding_wgrad_compute', False)
+ and self.mcore_gpt
+ and not self.use_mcore_dist_optim
): # Silently ignore the optimization if MCORE is not used
module_list = self.get_model_module_list()
if len(module_list) > 1:
@@ -839,7 +844,9 @@ def training_step(self, dataloader_iter):
ignore_virtual=True
):
if (
- self.cfg.get('defer_embedding_wgrad_compute', False) and self.mcore_gpt
+ self.cfg.get('defer_embedding_wgrad_compute', False)
+ and self.mcore_gpt
+ and not self.use_mcore_dist_optim
): # Silently ignore the optimization if MCORE is not used
module_list = self.get_model_module_list()
if len(module_list) > 1:
@@ -1549,6 +1556,7 @@ def build_train_valid_test_datasets(self):
"create_attention_mask": not self.get_attention_mask_from_fusion,
"mmap_bin_files": self.cfg.data.get("mmap_bin_files", True),
"drop_last_partial_validation_sequence": self.cfg.data.get("validation_drop_last", True),
+ "num_dataset_builder_threads": self.cfg.data.get("num_dataset_builder_threads", 1),
"add_extra_token_to_sequence": add_extra_token,
}
@@ -1683,6 +1691,12 @@ def setup(self, stage=None):
# Override limit_val_batches to be a multiple of num microbatches to prevent val_step from exiting in between a step
self._reconfigure_limit_batches(self.trainer.limit_val_batches, self._validation_dl, 'val')
+ # Data cache generation only
+ # Stops script execution after creating a data cache
+ if self.cfg.data.get('data_cache_generation_only', False):
+ self.trainer.num_sanity_val_steps = 0
+ self.trainer.should_stop = True
+
if stage == 'fit':
self.initialize_last_rank_embeddings()
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
index 9ab17189ca64..003719331fc1 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py
@@ -284,6 +284,7 @@ def _build_dataset(self, data_cfg, is_train=True):
prompt_template=data_cfg.get('prompt_template', None),
ceil_to_power_2=data_cfg.get('ceil_to_power_2', False),
get_attention_mask_from_fusion=data_cfg.get('get_attention_mask_from_fusion', False),
+ global_sample_mapping=data_cfg.get('global_sample_mapping', False),
virtual_tokens=self.virtual_tokens,
tokens_to_generate=data_cfg.get(
'tokens_to_generate', 0
diff --git a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py
index c7c175bfa0c1..2488751f808e 100644
--- a/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py
+++ b/nemo/collections/nlp/models/language_modeling/megatron_lm_encoder_decoder_model.py
@@ -274,8 +274,18 @@ def model_provider_func(self, pre_process, post_process, add_encoder, add_decode
en_block_spec = enc_dec_spec_fns[0](self.cfg.encoder.num_layers)
de_block_spec = enc_dec_spec_fns[1](self.cfg.decoder.num_layers)
+
+ encoder_config = copy.deepcopy(self.transformer_config)
+ encoder_config.num_layers = self.cfg.encoder.num_layers
+ if self.cfg.pipeline_model_parallel_size > 1:
+ assert (
+ self.cfg.pipeline_model_parallel_split_rank is not None
+ ), "Need to know how to shard the encoder & decoder."
+ encoder_config.pipeline_model_parallel_size = self.cfg.pipeline_model_parallel_split_rank
+
model = MCoreT5Model(
config=self.transformer_config,
+ encoder_config=encoder_config,
transformer_encoder_layer_spec=en_block_spec,
transformer_decoder_layer_spec=de_block_spec,
vocab_size=self.padded_vocab_size,
@@ -1787,6 +1797,9 @@ def on_load_checkpoint(self, checkpoint) -> None:
# addressing the current T5 mcore version's implementation of sharded_state_dict
checkpoint_state_dict['lm_head.output_layer.bias'] = checkpoint_state_dict['output_layer.bias']
+ checkpoint_state_dict['position_embeddings.weight'] = checkpoint_state_dict[
+ 'embedding.position_embeddings.weight'
+ ]
module.load_state_dict(checkpoint_state_dict, strict=True)
else:
diff --git a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
index 8d2d77c55cf2..7167eefda637 100644
--- a/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
+++ b/nemo/collections/nlp/modules/common/megatron/adapters/parallel_adapters.py
@@ -257,7 +257,7 @@ def __init__(
te_version = packaging.version.Version(version("transformer-engine"))
if te_version >= packaging.version.Version("1.5.0dev") and (
- not self.input_is_parallel and model_parallel_config.tp_comm_disable_qkv
+ not self.input_is_parallel and getattr(model_parallel_config, "tp_comm_overlap_disable_qkv", False)
):
# TE 1.5 introduces the option `return_layernorm_output_gathered`, so the all gather
# in the forward method is not needed, so set self._sequence_parallel to False
diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
index 15e6210bb69c..a0446f290826 100644
--- a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
+++ b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
@@ -223,8 +223,6 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]):
peft_cfgs: One or more PEFTConfig objects that specify the PEFT method configuration
"""
- if self.cfg.get('virtual_pipeline_model_parallel_size', None):
- raise ValueError('Virtual pipeline model parallel is not supported when using PEFT')
if self.cfg.optim.name == "distributed_fused_adam":
raise ValueError('distributed_fused_adam is not supported for PEFT. Please use fused_adam')
diff --git a/nemo/collections/tts/g2p/models/ja_jp_ipa.py b/nemo/collections/tts/g2p/models/ja_jp_ipa.py
new file mode 100644
index 000000000000..c57d463b51b2
--- /dev/null
+++ b/nemo/collections/tts/g2p/models/ja_jp_ipa.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+from collections import defaultdict
+from typing import Dict, List, Optional, Union
+
+from nemo.collections.common.tokenizers.text_to_speech.ipa_lexicon import (
+ get_grapheme_character_set,
+ get_ipa_punctuation_list,
+)
+from nemo.collections.tts.g2p.models.base import BaseG2p
+from nemo.collections.tts.g2p.utils import set_grapheme_case
+from nemo.utils import logging
+
+
+class JapaneseG2p(BaseG2p):
+ def __init__(
+ self,
+ phoneme_dict: Union[str, pathlib.Path, Dict[str, List[str]]],
+ phoneme_prefix: str = "",
+ ascii_letter_prefix: str = "#",
+ ascii_letter_case: str = "upper",
+ word_tokenize_func=None,
+ apply_to_oov_word=None,
+ mapping_file: Optional[str] = None,
+ word_segmenter: Optional[str] = None,
+ ):
+ """
+ Japanese G2P module. This module first segments Japanese characters into words using Janome, then
+ these separated words are converted into phoneme sequences by looking them up in the 'phoneme_dict'.
+ Args:
+ phoneme_dict (str, Path, Dict): Path to ja_JP_wordtoipa.txt dict file or a dict object.
+ phoneme_prefix (str): Prepend a special symbol to any phonemes in order to distinguish phonemes from
+ graphemes because there may be overlaps between the two sets. It is suggested to choose a prefix that
+ is not used or preserved somewhere else. Default to "#".
+ ascii_letter_prefix (str): Prepend a special symbol to any ASCII letters. Default to "".
+ ascii_letter_case (str): Specify the case chosen from `"lower"`, `"upper"`, or `"mixed"`, and process the
+ cases of non-Chinese words. Default to `"upper"`.
+ word_tokenize_func: Function for tokenizing text to words.
+ It has to return List[Tuple[Union[str, List[str]], bool]] where every tuple denotes word representation
+ and flag whether to leave unchanged or not.
+ It is expected that unchangeable word representation will be represented as List[str], other cases are
+ represented as str.
+ It is useful to mark word as unchangeable which is already in phoneme representation.
+ apply_to_oov_word: Function that will be applied to out of phoneme_dict word.
+ word_segmenter: method that will be applied to segment utterances into words for better polyphone disambiguation.
+ """
+ assert phoneme_dict is not None, "Please set the phoneme_dict path."
+ assert word_segmenter in [
+ None,
+ "janome",
+ ], f"{word_segmenter} is not supported now. Please choose correct word_segmenter."
+
+ if phoneme_prefix is None:
+ phoneme_prefix = ""
+ if ascii_letter_prefix is None:
+ ascii_letter_prefix = ""
+
+ # phonemes
+ phoneme_dict = (
+ self._parse_ja_phoneme_dict(phoneme_dict, phoneme_prefix)
+ if isinstance(phoneme_dict, str) or isinstance(phoneme_dict, pathlib.Path)
+ else phoneme_dict
+ )
+ self.phoneme_list = sorted({pron for prons in phoneme_dict.values() for pron in prons})
+
+ # ascii letters
+ self.ascii_letter_dict = {
+ x: ascii_letter_prefix + x for x in get_grapheme_character_set(locale="en-US", case=ascii_letter_case)
+ }
+ self.ascii_letter_list = sorted(self.ascii_letter_dict)
+ self.ascii_letter_case = ascii_letter_case
+ self.punctuation = get_ipa_punctuation_list('ja-JP')
+
+ if apply_to_oov_word is None:
+ logging.warning(
+ "apply_to_oov_word=None, This means that some of words will remain unchanged "
+ "if they are not handled by any of the rules in self.parse_one_word(). "
+ "This may be intended if phonemes and chars are both valid inputs, otherwise, "
+ "you may see unexpected deletions in your input."
+ )
+
+ super().__init__(
+ phoneme_dict=phoneme_dict,
+ word_tokenize_func=word_tokenize_func,
+ apply_to_oov_word=apply_to_oov_word,
+ mapping_file=mapping_file,
+ )
+
+ if word_segmenter == "janome":
+ try:
+ from janome.tokenizer import Tokenizer
+ except ImportError as e:
+ logging.error(e)
+
+ # Cut sentences into words to improve polyphone disambiguation
+ self.word_segmenter = Tokenizer().tokenize
+ else:
+ self.word_segmenter = lambda x: [x]
+
+ @staticmethod
+ def _parse_ja_phoneme_dict(
+ phoneme_dict_path: Union[str, pathlib.Path], phoneme_prefix: str
+ ) -> Dict[str, List[str]]:
+ """Loads prondict dict file, and generates a set of all valid symbols."""
+ g2p_dict = defaultdict(list)
+ with open(phoneme_dict_path, 'r') as file:
+ for line in file:
+ # skip empty lines and comment lines starting with `;;;`.
+ if line.startswith(";;;") or len(line.strip()) == 0:
+ continue
+
+ word, pronunciation = line.rstrip().split(maxsplit=1)
+
+ # add a prefix to distinguish phoneme symbols from non-phoneme symbols.
+ pronunciation_with_prefix = [phoneme_prefix + pron for pron in pronunciation]
+ g2p_dict[word] = pronunciation_with_prefix
+
+ return g2p_dict
+
+ def __call__(self, text: str) -> List[str]:
+ """
+ This forward pass function translates Japanese characters into IPA phoneme sequences.
+
+ For example, The text "こんにちは" would be converted as a list,
+ `['k', 'o', 'n', 'n', 'i', 't', 'ʃ', 'i', 'h', 'a']`
+ """
+ text = set_grapheme_case(text, case=self.ascii_letter_case)
+
+ words_list = self.word_segmenter(text)
+ phoneme_seq = []
+ for token in words_list:
+ word = str(token).split("\t")[0]
+ if word in self.phoneme_dict.keys():
+ phoneme_seq += self.phoneme_dict[word]
+ elif word in self.punctuation:
+ phoneme_seq += word
+ else:
+ logging.warning(f"{word} not found in the pronunciation dictionary. Returning graphemes instead.")
+ phoneme_seq += [c for c in word]
+ return phoneme_seq
diff --git a/nemo/core/classes/mixins/hf_io_mixin.py b/nemo/core/classes/mixins/hf_io_mixin.py
index b101cbabe749..543d6c6fccda 100644
--- a/nemo/core/classes/mixins/hf_io_mixin.py
+++ b/nemo/core/classes/mixins/hf_io_mixin.py
@@ -14,9 +14,9 @@
from abc import ABC
from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union
-from huggingface_hub import HfApi, ModelCard, ModelCardData, ModelFilter
+from huggingface_hub import HfApi, ModelCard, ModelCardData
from huggingface_hub import get_token as get_hf_token
from huggingface_hub.hf_api import ModelInfo
from huggingface_hub.utils import SoftTemporaryDirectory
@@ -35,31 +35,35 @@ class HuggingFaceFileIO(ABC):
"""
@classmethod
- def get_hf_model_filter(cls) -> ModelFilter:
+ def get_hf_model_filter(cls) -> Dict[str, Any]:
"""
Generates a filter for HuggingFace models.
- Additionally includes default values of some metadata about results returned by the Hub.
+ Additionaly includes default values of some metadata about results returned by the Hub.
Metadata:
resolve_card_info: Bool flag, if set, returns the model card metadata. Default: False.
limit_results: Optional int, limits the number of results returned.
Returns:
- A Hugging Face Hub ModelFilter object.
+ A dict representing the arguments passable to huggingface list_models().
"""
- model_filter = ModelFilter(library='nemo')
-
- # Attach some additional info
- model_filter.resolve_card_info = False
- model_filter.limit_results = None
+ model_filter = dict(
+ author=None,
+ library='nemo',
+ language=None,
+ model_name=None,
+ task=None,
+ tags=None,
+ limit=None,
+ full=None,
+ cardData=False,
+ )
return model_filter
@classmethod
- def search_huggingface_models(
- cls, model_filter: Optional[Union[ModelFilter, List[ModelFilter]]] = None
- ) -> List['ModelInfo']:
+ def search_huggingface_models(cls, model_filter: Optional[Dict[str, Any]] = None) -> Iterable['ModelInfo']:
"""
Should list all pre-trained models available via Hugging Face Hub.
@@ -75,16 +79,16 @@ def search_huggingface_models(
# You can replace with any subclass of ModelPT.
from nemo.core import ModelPT
- # Get default ModelFilter
+ # Get default filter dict
filt = .get_hf_model_filter()
# Make any modifications to the filter as necessary
- filt.language = [...]
- filt.task = ...
- filt.tags = [...]
+ filt['language'] = [...]
+ filt['task'] = ...
+ filt['tags'] = [...]
- # Add any metadata to the filter as needed
- filt.limit_results = 5
+ # Add any metadata to the filter as needed (kwargs to list_models)
+ filt['limit'] = 5
# Obtain model info
model_infos = .search_huggingface_models(model_filter=filt)
@@ -96,10 +100,9 @@ def search_huggingface_models(
model = ModelPT.from_pretrained(card.modelId)
Args:
- model_filter: Optional ModelFilter or List[ModelFilter] (from Hugging Face Hub)
+ model_filter: Optional Dictionary (for Hugging Face Hub kwargs)
that filters the returned list of compatible model cards, and selects all results from each filter.
Users can then use `model_card.modelId` in `from_pretrained()` to restore a NeMo Model.
- If no ModelFilter is provided, uses the classes default filter as defined by `get_hf_model_filter()`.
Returns:
A list of ModelInfo entries.
@@ -108,23 +111,6 @@ def search_huggingface_models(
if model_filter is None:
model_filter = cls.get_hf_model_filter()
- # If single model filter, wrap into list
- if not isinstance(model_filter, Iterable):
- model_filter = [model_filter]
-
- # Inject `nemo` library filter
- for mfilter in model_filter:
- if isinstance(mfilter.library, str) and mfilter.library != 'nemo':
- logging.warning(f"Model filter's `library` tag updated be `nemo`. Original value: {mfilter.library}")
- mfilter.library = "nemo"
-
- elif isinstance(mfilter, Iterable) and 'nemo' not in mfilter.library:
- logging.warning(
- f"Model filter's `library` list updated to include `nemo`. Original value: {mfilter.library}"
- )
- mfilter.library = list(mfilter)
- mfilter.library.append('nemo')
-
# Check if api token exists, use if it does
hf_token = get_hf_token()
@@ -134,24 +120,11 @@ def search_huggingface_models(
# Setup extra arguments for model filtering
all_results = [] # type: List[ModelInfo]
- for mfilter in model_filter:
- cardData = None
- limit = None
-
- if hasattr(mfilter, 'resolve_card_info') and mfilter.resolve_card_info is True:
- cardData = True
-
- if hasattr(mfilter, 'limit_results') and mfilter.limit_results is not None:
- limit = mfilter.limit_results
-
- results = api.list_models(
- filter=mfilter, token=hf_token, sort="lastModified", direction=-1, cardData=cardData, limit=limit,
- ) # type: Iterable[ModelInfo]
-
- for result in results:
- all_results.append(result)
+ results = api.list_models(
+ token=hf_token, sort="lastModified", direction=-1, **model_filter
+ ) # type: Iterable[ModelInfo]
- return all_results
+ return results
def push_to_hf_hub(
self,
@@ -284,7 +257,10 @@ def _get_hf_model_card(self, template: str, template_kwargs: Optional[Dict[str,
A HuggingFace ModelCard object that can be converted to a model card string.
"""
card_data = ModelCardData(
- library_name='nemo', tags=['pytorch', 'NeMo'], license='cc-by-4.0', ignore_metadata_errors=True,
+ library_name='nemo',
+ tags=['pytorch', 'NeMo'],
+ license='cc-by-4.0',
+ ignore_metadata_errors=True,
)
if 'card_data' not in template_kwargs:
diff --git a/nemo/deploy/multimodal/query_multimodal.py b/nemo/deploy/multimodal/query_multimodal.py
index ee3d24d4ec1e..1c01c6861048 100644
--- a/nemo/deploy/multimodal/query_multimodal.py
+++ b/nemo/deploy/multimodal/query_multimodal.py
@@ -13,7 +13,6 @@
# limitations under the License.
import numpy as np
-from decord import VideoReader
from PIL import Image
from nemo.deploy.utils import str_list2numpy
@@ -24,6 +23,13 @@
except Exception:
use_pytriton = False
+try:
+ from decord import VideoReader
+except Exception:
+ import logging
+
+ logging.warning("The package `decord` was not installed in this environment.")
+
class NemoQueryMultimodal:
"""
diff --git a/nemo/export/multimodal/run.py b/nemo/export/multimodal/run.py
index 86bcc716af79..149df995c77a 100644
--- a/nemo/export/multimodal/run.py
+++ b/nemo/export/multimodal/run.py
@@ -16,7 +16,13 @@
import json
import os
-import decord
+try:
+ import decord
+except Exception:
+ import logging
+
+ logging.warning("The package `decord` was not installed in this environment.")
+
import einops
import numpy as np
import tensorrt as trt
diff --git a/nemo/export/quantize/quantizer.py b/nemo/export/quantize/quantizer.py
index e645ed8971c3..590cf50c804c 100644
--- a/nemo/export/quantize/quantizer.py
+++ b/nemo/export/quantize/quantizer.py
@@ -225,7 +225,8 @@ def export(self, model: MegatronGPTModel):
assert self.export_config is not None, "Export config is not set"
torch_dtype = torch_dtype_from_precision(self.export_config.dtype)
- self._sample_output(model)
+ if self.export_config.get("sample_output", True):
+ self._sample_output(model)
if model.cfg.megatron_amp_O2:
model.model = unwrap_model(model.model, Float16Module)
diff --git a/nemo/export/tensorrt_llm.py b/nemo/export/tensorrt_llm.py
index b4299dfd8945..fd5fb1a688df 100644
--- a/nemo/export/tensorrt_llm.py
+++ b/nemo/export/tensorrt_llm.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import gc
import json
import logging
import os
@@ -38,16 +39,24 @@
is_nemo_file,
load_nemo_model,
)
-from nemo.export.trt_llm.qnemo import qnemo_to_tensorrt_llm
-from nemo.export.trt_llm.qnemo.tokenizer_utils import get_nmt_tokenizer
-from nemo.export.trt_llm.qnemo.utils import is_qnemo_checkpoint
from nemo.export.trt_llm.tensorrt_llm_build import build_and_save_engine
from nemo.export.trt_llm.tensorrt_llm_run import generate, generate_streaming, load, load_distributed, refit
+LOGGER = logging.getLogger("NeMo")
+
+use_model_opt = True
+try:
+ from nemo.export.trt_llm.qnemo import qnemo_to_tensorrt_llm
+ from nemo.export.trt_llm.qnemo.tokenizer_utils import get_nmt_tokenizer
+ from nemo.export.trt_llm.qnemo.utils import is_qnemo_checkpoint
+except Exception as e:
+ LOGGER.warning(f"Cannot import the Model Optimizer, it will not be available. {type(e).__name__}: {e}")
+ use_model_opt = False
+
use_deploy = True
try:
from nemo.deploy.utils import cast_output, str_ndarray2list
-except Exception:
+except Exception as e:
use_deploy = False
@@ -67,8 +76,6 @@ def wrapper(*args, **kwargs):
except Exception:
use_pytriton = False
-LOGGER = logging.getLogger("NeMo")
-
class TensorRTLLM(ITritonDeployable):
"""
@@ -95,6 +102,8 @@ def __init__(
lora_ckpt_list: List[str] = None,
load_model: bool = True,
use_python_runtime: bool = True,
+ enable_chunked_context: bool = None,
+ max_tokens_in_paged_kv_cache: int = None,
):
"""
Args:
@@ -104,9 +113,19 @@ def __init__(
use_python_runtime (bool): whether to use python or c++ runtime.
"""
+ if use_python_runtime:
+ if enable_chunked_context is not None or max_tokens_in_paged_kv_cache is not None:
+ raise Exception(
+ "enable_chunked_context and max_tokens_in_paged_kv_cache options "
+ "work only with the TensorRT-LLM C++ runtime. Please set "
+ "use_python_runtime=False to use these options."
+ )
+
self.model_dir = model_dir
self.lora_ckpt_list = lora_ckpt_list
self.use_python_runtime = use_python_runtime
+ self.enable_chunked_context = enable_chunked_context if enable_chunked_context is not None else False
+ self.max_tokens_in_paged_kv_cache = max_tokens_in_paged_kv_cache
self.model = None
self.tokenizer = None
self.n_gpus = None
@@ -148,6 +167,10 @@ def export(
max_lora_rank: int = 64,
max_num_tokens: int = None,
opt_num_tokens: int = None,
+ max_seq_len: int = None,
+ multiple_profiles: bool = False,
+ gpt_attention_plugin: str = "auto",
+ gemm_plugin: str = "auto",
):
"""
Exports nemo checkpoints to TensorRT-LLM.
@@ -179,6 +202,10 @@ def export(
max_lora_rank (int): maximum lora rank.
max_num_tokens (int):
opt_num_tokens (int):
+ max_seq_len (int):
+ multiple_profiles: (bool): enables multiple profiles feature of TRT-LLM. Default = False
+ gpt_attention_plugin (str): enable the gpt attention plugin. Default = "auto"
+ gemm_plugin (str): enable the gpt plugin. Default = "auto"
"""
if n_gpus is not None:
@@ -233,7 +260,12 @@ def export(
tmp_dir = tempfile.TemporaryDirectory()
nemo_export_dir = Path(tmp_dir.name)
- if is_qnemo_checkpoint(nemo_checkpoint_path):
+ is_qnemo_ckpt = False
+ if use_model_opt:
+ if is_qnemo_checkpoint(nemo_checkpoint_path):
+ is_qnemo_ckpt = True
+
+ if is_qnemo_ckpt:
if os.path.isdir(nemo_checkpoint_path):
nemo_export_dir = nemo_checkpoint_path
else:
@@ -310,6 +342,10 @@ def export(
paged_context_fmha=paged_context_fmha,
max_num_tokens=max_num_tokens,
opt_num_tokens=opt_num_tokens,
+ max_seq_len=max_seq_len,
+ multiple_profiles=multiple_profiles,
+ gpt_attention_plugin=gpt_attention_plugin,
+ gemm_plugin=gemm_plugin,
)
tokenizer_path = os.path.join(nemo_export_dir, "tokenizer.model")
@@ -402,6 +438,8 @@ def refit(self, model, model_config):
tokenizer_vocab_size=self.tokenizer.vocab_size,
)
load_distributed(self.model_dir, self.mp_rank, self.gpus_per_node)
+ gc.collect()
+ torch.cuda.empty_cache()
refit(weights_dict)
def forward(
@@ -838,6 +876,8 @@ def _load(self):
engine_dir=self.model_dir,
lora_ckpt_list=self.lora_ckpt_list,
use_python_runtime=self.use_python_runtime,
+ enable_chunked_context=self.enable_chunked_context,
+ max_tokens_in_paged_kv_cache=self.max_tokens_in_paged_kv_cache,
)
self._load_prompt_tables()
except Exception as error:
diff --git a/nemo/export/trt_llm/converter/model_converter.py b/nemo/export/trt_llm/converter/model_converter.py
old mode 100644
new mode 100755
index 2a78f6833782..60d50316e9ed
--- a/nemo/export/trt_llm/converter/model_converter.py
+++ b/nemo/export/trt_llm/converter/model_converter.py
@@ -22,6 +22,8 @@
from tensorrt_llm._utils import pad_vocab_size
from tensorrt_llm.functional import non_gated_version
from tensorrt_llm.layers import MoeConfig
+from tensorrt_llm.models.gpt.config import GPTConfig
+from tensorrt_llm.models.llama.config import LLaMAConfig
from tensorrt_llm.models.modeling_utils import PretrainedConfig
from nemo.export.trt_llm.converter.model_to_trt_llm_ckpt import (
@@ -33,6 +35,15 @@
LOGGER = logging.getLogger("NeMo")
+def get_config(decoder_type, config):
+ if decoder_type == "llama":
+ return LLaMAConfig(**config)
+ elif decoder_type == "gpt" or decoder_type == "gptnext":
+ return GPTConfig(**config)
+ else:
+ return PretrainedConfig(**config)
+
+
def prompt_convert(prompt_config, prompt_weights):
if "task_templates" in prompt_config:
prompt_templates = prompt_config["task_templates"]
@@ -156,11 +167,13 @@ def model_to_trtllm_ckpt(
'rotary_pct': nemo_model_config.get('rotary_percentage', 1.0),
'rotary_base': nemo_model_config.get('rotary_base', 10000),
'moe_num_experts': nemo_model_config.get('num_moe_experts', 0),
- 'moe_top_k': nemo_model_config.get('moe_router_topk'),
+ 'moe_top_k': nemo_model_config.get('moe_router_topk', 0),
'moe_normalization_mode': nemo_model_config.get(
'moe_renorm_mode', MoeConfig.ExpertScaleNormalizationMode.RENORMALIZE
),
- 'moe_tp_mode': nemo_model_config.get('moe_tp_mode', MoeConfig.ParallelismMode.TENSOR_PARALLEL),
+ 'moe_tp_mode': nemo_model_config.get(
+ 'moe_tp_mode', 2
+ ), # change MoeConfig.ParallelismMode.TENSOR_PARALLEL to 2
'logits_dtype': 'float32',
'world_size': world_size,
'tp_size': tensor_parallel_size,
@@ -179,7 +192,7 @@ def model_to_trtllm_ckpt(
if use_distributed_convert:
config["gpus_per_node"] = gpus_per_node
- model_configs.append(PretrainedConfig(**config))
+ model_configs.append(get_config(decoder_type, config))
model_configs[0].mapping = tensorrt_llm.Mapping(
world_size=world_size,
rank=model_parallel_rank,
@@ -258,7 +271,7 @@ def model_to_trtllm_ckpt(
weights_dict_local["transformer.ln_f.bias"] = ln_f_bias
config["gpus_per_node"] = gpus_per_node
- model_config = PretrainedConfig(**config)
+ model_config = get_config(decoder_type, config)
model_config.mapping = mapping
model_configs.append(model_config)
weights_dicts.append(weights_dict_local)
diff --git a/nemo/export/trt_llm/converter/utils.py b/nemo/export/trt_llm/converter/utils.py
old mode 100644
new mode 100755
index 3768ff4b2844..eab17167cbd5
--- a/nemo/export/trt_llm/converter/utils.py
+++ b/nemo/export/trt_llm/converter/utils.py
@@ -26,7 +26,7 @@
DECODER_MODEL_TYPE = {
"gptj": 'GPTForCausalLM',
"gptnext": 'GPTForCausalLM',
- "llama": 'LLaMAForCausalLM',
+ "llama": 'LlamaForCausalLM',
"gemma": 'GemmaForCausalLM',
"falcon": 'FalconForCausalLM',
}
diff --git a/nemo/export/trt_llm/tensorrt_llm_build.py b/nemo/export/trt_llm/tensorrt_llm_build.py
old mode 100644
new mode 100755
index b329de2a3b18..d04698c318bf
--- a/nemo/export/trt_llm/tensorrt_llm_build.py
+++ b/nemo/export/trt_llm/tensorrt_llm_build.py
@@ -45,41 +45,51 @@ def build_and_save_engine(
paged_kv_cache: bool = True,
remove_input_padding: bool = True,
paged_context_fmha: bool = False,
- custom_all_reduce: bool = True,
+ use_custom_all_reduce: bool = True,
use_refit: bool = False,
max_num_tokens: int = None,
+ max_seq_len: int = None,
opt_num_tokens: int = None,
max_beam_width: int = 1,
tokens_per_block: int = 128,
+ multiple_profiles: bool = False,
+ gpt_attention_plugin: str = "auto",
+ gemm_plugin: str = "auto",
):
+ architecture = "LLaMAForCausalLM" if model_config.architecture == "LlamaForCausalLM" else model_config.architecture
try:
- model_cls = getattr(tensorrt_llm.models, model_config.architecture)
+ model_cls = getattr(tensorrt_llm.models, architecture)
except:
raise AttributeError(f"Could not find TRTLLM model type: {model_type}!")
logger.set_level("info")
- str_dtype = model_config.dtype
plugin_config = PluginConfig()
- plugin_config.set_gpt_attention_plugin(dtype=str_dtype)
- plugin_config.set_gemm_plugin(dtype=str_dtype)
- plugin_config.use_custom_all_reduce = custom_all_reduce
- plugin_config.set_plugin("multi_block_mode", enable_multi_block_mode)
+ plugin_config.gpt_attention_plugin = gpt_attention_plugin
+ plugin_config.gemm_plugin = gemm_plugin
+ plugin_config.set_nccl_plugin(use_custom_all_reduce=use_custom_all_reduce)
+ plugin_config.multi_block_mode = enable_multi_block_mode
if paged_kv_cache:
plugin_config.enable_paged_kv_cache(tokens_per_block=tokens_per_block)
else:
plugin_config.paged_kv_cache = False
plugin_config.remove_input_padding = remove_input_padding
plugin_config.use_paged_context_fmha = paged_context_fmha
+ plugin_config.multiple_profiles = multiple_profiles
+
+ if max_seq_len is None:
+ max_seq_len = max_input_len + max_output_len
max_num_tokens, opt_num_tokens = check_max_num_tokens(
max_num_tokens=max_num_tokens,
opt_num_tokens=opt_num_tokens,
+ max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
max_input_len=max_input_len,
max_beam_width=max_beam_width,
remove_input_padding=remove_input_padding,
enable_context_fmha=plugin_config.context_fmha,
tokens_per_block=tokens_per_block,
+ multiple_profiles=multiple_profiles,
)
build_dict = {
@@ -87,6 +97,7 @@ def build_and_save_engine(
'max_output_len': max_output_len,
'max_batch_size': max_batch_size,
'max_beam_width': max_beam_width,
+ 'max_seq_len': max_seq_len,
'max_num_tokens': max_num_tokens,
'opt_num_tokens': opt_num_tokens,
'max_prompt_embedding_table_size': max_prompt_embedding_table_size,
@@ -95,11 +106,13 @@ def build_and_save_engine(
'strongly_typed': False,
'builder_opt': None,
'use_refit': use_refit,
+ 'multiple_profiles': multiple_profiles,
}
build_config = BuildConfig.from_dict(build_dict, plugin_config=plugin_config)
if use_lora_plugin is not None:
- build_config.plugin_config.set_lora_plugin(use_lora_plugin)
+ # build_config.plugin_config.set_lora_plugin(use_lora_plugin)
+ # build_config.plugin_config._lora_plugin = use_lora_plugin
lora_config = LoraConfig(
lora_dir=lora_ckpt_list,
lora_ckpt_source='nemo',
diff --git a/nemo/export/trt_llm/tensorrt_llm_run.py b/nemo/export/trt_llm/tensorrt_llm_run.py
index dbbf40cc3cf1..14ad0be699bb 100644
--- a/nemo/export/trt_llm/tensorrt_llm_run.py
+++ b/nemo/export/trt_llm/tensorrt_llm_run.py
@@ -26,15 +26,26 @@
import tensorrt_llm
import torch
from mpi4py.futures import MPIPoolExecutor
-from tensorrt_llm.bindings import GptJsonConfig, GptSession, GptSessionConfig, KvCacheConfig, WorldConfig
from tensorrt_llm.lora_manager import LoraManager
from tensorrt_llm.quantization import QuantMode
from tensorrt_llm.runtime import ModelConfig, ModelRunner, ModelRunnerCpp, SamplingConfig
-from tensorrt_llm.runtime.model_runner_cpp import ModelRunnerCppGptSession
+
from transformers import PreTrainedTokenizer
LOGGER = logging.getLogger("NeMo")
+use_trtllm_bindings = True
+try:
+ from tensorrt_llm.bindings import GptJsonConfig, GptSession, GptSessionConfig, KvCacheConfig, WorldConfig
+except Exception as e:
+ use_trtllm_bindings = False
+
+use_cpp_gpt_session = True
+try:
+ from tensorrt_llm.runtime.model_runner_cpp import ModelRunnerCppGptSession
+except Exception as e:
+ use_cpp_gpt_session = False
+
@dataclass
class TensorrtLLMHostContext:
@@ -131,6 +142,8 @@ def _load(
lora_ckpt_list=None,
num_beams=1,
use_python_runtime: bool = True,
+ enable_chunked_context: bool = False,
+ max_tokens_in_paged_kv_cache: int = None,
):
"""The impl of `load` API for on a single GPU worker."""
try:
@@ -145,7 +158,7 @@ def _load(
max_batch_size = config["build_config"]["max_batch_size"]
max_input_len = config["build_config"]["max_input_len"]
- max_output_len = config["build_config"]["max_output_len"]
+ # max_output_len = config["build_config"]["max_output_len"]
max_beam_width = config["build_config"]["max_beam_width"]
runtime_rank = tensorrt_llm.mpi_rank()
@@ -166,8 +179,10 @@ def _load(
rank=runtime_rank,
max_batch_size=max_batch_size,
max_input_len=max_input_len,
- max_output_len=max_output_len,
+ # max_output_len=max_output_len,
max_beam_width=max_beam_width,
+ enable_chunked_context=enable_chunked_context,
+ max_tokens_in_paged_kv_cache=max_tokens_in_paged_kv_cache,
debug_mode=False,
)
@@ -279,6 +294,8 @@ def load(
lora_ckpt_list: List[str] = None,
num_beams: int = 1,
use_python_runtime: bool = True,
+ enable_chunked_context: bool = False,
+ max_tokens_in_paged_kv_cache: int = None,
) -> TensorrtLLMHostContext:
"""Loaded the compiled LLM model and run it.
@@ -290,17 +307,42 @@ def load(
config = json.load(f)
world_size = config["pretrained_config"]["mapping"]["world_size"]
if world_size == 1:
- _load(tokenizer, engine_dir, lora_ckpt_list, num_beams, use_python_runtime)
+ _load(
+ tokenizer,
+ engine_dir,
+ lora_ckpt_list,
+ num_beams,
+ use_python_runtime,
+ enable_chunked_context,
+ max_tokens_in_paged_kv_cache,
+ )
executor = None
elif tensorrt_llm.mpi_world_size() > 1:
- _load(tokenizer, engine_dir, lora_ckpt_list, num_beams, use_python_runtime)
+ _load(
+ tokenizer,
+ engine_dir,
+ lora_ckpt_list,
+ num_beams,
+ use_python_runtime,
+ enable_chunked_context,
+ max_tokens_in_paged_kv_cache,
+ )
executor = None
tensorrt_llm.mpi_barrier()
else:
executor = MPIPoolExecutor(max_workers=world_size)
futures = []
for _ in range(world_size):
- future = executor.submit(_load, tokenizer, engine_dir, lora_ckpt_list, num_beams, use_python_runtime)
+ future = executor.submit(
+ _load,
+ tokenizer,
+ engine_dir,
+ lora_ckpt_list,
+ num_beams,
+ use_python_runtime,
+ enable_chunked_context,
+ max_tokens_in_paged_kv_cache,
+ )
futures.append(future)
for future in futures:
future.result()
diff --git a/nemo/lightning/_strategy_lib.py b/nemo/lightning/_strategy_lib.py
index 5d7910f70f03..d647fe1b69ea 100644
--- a/nemo/lightning/_strategy_lib.py
+++ b/nemo/lightning/_strategy_lib.py
@@ -61,12 +61,14 @@ def init_parallel_ranks(
global_rank=init_global_rank,
local_rank=init_local_rank,
tensor_model_parallel_size=parallel_config.tensor_model_parallel_size,
+ expert_model_parallel_size=parallel_config.expert_model_parallel_size,
pipeline_model_parallel_size=parallel_config.pipeline_model_parallel_size,
virtual_pipeline_model_parallel_size=parallel_config.virtual_pipeline_model_parallel_size,
+ context_parallel_size=parallel_config.context_parallel_size,
seed=seed,
pipeline_model_parallel_split_rank=getattr(parallel_config, "pipeline_model_parallel_split_rank", None),
use_fp8=fp8,
- init_mpi_proc_group=getattr(parallel_config, "ub_tp_comm_overlap", False),
+ init_mpi_proc_group=getattr(parallel_config, "tp_comm_overlap", False),
# apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30),
)
@@ -92,6 +94,8 @@ def init_model_parallel(model: Optional[nn.Module] = None) -> None:
pipeline_model_parallel_size=app_state.pipeline_model_parallel_size,
virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size,
pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank,
+ context_parallel_size=app_state.context_parallel_size,
+ expert_model_parallel_size=app_state.expert_model_parallel_size,
)
# assert that fake tp and pp rank match after model parallel init
@@ -124,19 +128,19 @@ def set_model_parallel_attributes(model, parallelism):
# Given Lightning's structure it would be better if parallelism is a different object
# Since then it can be passed to the Strategy
+ from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.transformer.transformer_config import TransformerConfig
+ assert isinstance(
+ parallelism, ModelParallelConfig
+ ), f"Expected parallelism config to be of type ModelParallelConfig, but got {type(parallelism)}"
has_mcore_config = isinstance(getattr(model, "config", None), TransformerConfig)
if has_mcore_config and hasattr(model, "configure_model"):
config: TransformerConfig = model.config
- config.tensor_model_parallel_size = parallelism.tensor_model_parallel_size
- config.pipeline_model_parallel_size = parallelism.pipeline_model_parallel_size
- config.virtual_pipeline_model_parallel_size = parallelism.virtual_pipeline_model_parallel_size
- config.context_parallel_size = parallelism.context_parallel_size
- config.expert_model_parallel_size = parallelism.expert_model_parallel_size
- config.moe_extended_tp = parallelism.moe_extended_tp
- config.sequence_parallel = parallelism.sequence_parallel
- config.pipeline_dtype = parallelism.pipeline_dtype
+ for attr_name in filter(lambda x: not x.startswith('__'), dir(parallelism)):
+ if not hasattr(config, attr_name):
+ continue
+ setattr(config, attr_name, getattr(parallelism, attr_name))
return config
diff --git a/nemo/lightning/data.py b/nemo/lightning/data.py
index 58ba81a4ddac..96f07206d770 100644
--- a/nemo/lightning/data.py
+++ b/nemo/lightning/data.py
@@ -8,6 +8,7 @@
from torch.utils.data import DataLoader, Dataset
+## TODO: remove? unused
def create_dataloader(
dataset: "Dataset", drop_last: bool = True, pad_samples_to_global_batch_size=False, **kwargs
) -> DataLoader:
@@ -127,6 +128,14 @@ def add_megatron_sampler(
)
+class WrappedDataLoader(DataLoader):
+ """Wrapper around torch DataLoader which stores the dataloader mode"""
+
+ def __init__(self, mode="train", **dataloader_kwargs):
+ super().__init__(**dataloader_kwargs)
+ self.mode = mode
+
+
# TODO: Replace this with megatron.core.data.data_samplers after we upgrade
class BaseMegatronSampler:
def __init__(
@@ -144,8 +153,6 @@ def __init__(
# Sanity checks.
if total_samples <= 0:
raise RuntimeError(f"no sample to consume: {total_samples}")
- if consumed_samples >= total_samples:
- raise RuntimeError(f"no samples left to consume: {consumed_samples}, {total_samples}")
if micro_batch_size <= 0:
raise RuntimeError(f"micro_batch_size size must be greater than 0, but {micro_batch_size}")
if data_parallel_size <= 0:
@@ -200,6 +207,32 @@ def __iter__(self): ...
class MegatronPretrainingSampler(BaseMegatronSampler):
+ def __init__(
+ self,
+ total_samples: int,
+ consumed_samples: int,
+ micro_batch_size: int,
+ data_parallel_rank: int,
+ data_parallel_size: int,
+ drop_last: bool = True,
+ global_batch_size: Optional[int] = None,
+ rampup_batch_size: Optional[list] = None,
+ pad_samples_to_global_batch_size: Optional[bool] = False,
+ ):
+ super().__init__(
+ total_samples=total_samples,
+ consumed_samples=consumed_samples,
+ micro_batch_size=micro_batch_size,
+ data_parallel_rank=data_parallel_rank,
+ data_parallel_size=data_parallel_size,
+ drop_last=drop_last,
+ global_batch_size=global_batch_size,
+ rampup_batch_size=rampup_batch_size,
+ pad_samples_to_global_batch_size=pad_samples_to_global_batch_size,
+ )
+ if consumed_samples >= total_samples:
+ raise RuntimeError(f"no samples left to consume: {consumed_samples}, {total_samples}")
+
def get_start_end_idx(self):
start_idx = self.data_parallel_rank * self.micro_batch_size
end_idx = start_idx + self.micro_batch_size
diff --git a/nemo/lightning/io/api.py b/nemo/lightning/io/api.py
index cc594b562cff..4d31f020c44a 100644
--- a/nemo/lightning/io/api.py
+++ b/nemo/lightning/io/api.py
@@ -1,11 +1,13 @@
+import json
from pathlib import Path
+from pydoc import locate
from typing import Any, Callable, Optional, Type, TypeVar
import fiddle as fdl
import pytorch_lightning as pl
from fiddle._src.experimental import serialization
-from nemo.lightning.io.mixin import ConnectorMixin, ConnT, ModelConnector
+from nemo.lightning.io.mixin import ConnectorMixin, ConnT, ModelConnector, track_io
from nemo.lightning.io.pl import TrainerContext
CkptType = TypeVar("CkptType")
@@ -41,6 +43,14 @@ def load(path: Path, output_type: Type[CkptType] = Any) -> CkptType:
if not _path.is_file():
raise FileNotFoundError(f"No such file: '{_path}'")
+ ## add IO functionality to custom objects present in the json file
+ with open(_path) as f:
+ j = json.load(f)
+ for obj, val in j["objects"].items():
+ clss = ".".join([val["type"]["module"], val["type"]["name"]])
+ if not serialization.find_node_traverser(locate(clss)):
+ track_io(locate(clss))
+
with open(_path, "rb") as f:
config = serialization.load_json(f.read())
diff --git a/nemo/lightning/io/state.py b/nemo/lightning/io/state.py
index b69fed9d0f4f..9fd81a960358 100644
--- a/nemo/lightning/io/state.py
+++ b/nemo/lightning/io/state.py
@@ -4,6 +4,7 @@
from typing import Any, Callable, Dict, Generic, List, Optional, Tuple, TypeVar, Union, overload
import numpy as np
+import torch
from torch import nn
SourceModuleT = TypeVar("SourceModuleT", bound=nn.Module)
@@ -19,11 +20,12 @@ class TransformCTX:
target_state: dict
+@torch.no_grad
def apply_transforms(
source: nn.Module,
target: TargetModuleT,
mapping: Dict[str, str],
- transforms: Optional[List[Callable[[TransformCTX], TransformCTX]]] = None,
+ transforms: Optional[List[Callable[[TransformCTX], TransformCTX]]] = [],
) -> TargetModuleT:
"""
Applies a series of transformations to adapt the state dictionary of a source module to
@@ -101,9 +103,8 @@ def scale_weights(ctx):
for key, val in mapping.items():
ctx = StateDictTransform(key, val)(ctx)
- if transforms:
- for transform in transforms:
- ctx = transform(ctx)
+ for transform in transforms:
+ ctx = transform(ctx)
_params: Dict[str, nn.Parameter] = {}
for name, param in _target.named_parameters():
@@ -144,9 +145,9 @@ def scale_weights(ctx):
_module.register_buffer(_key, val)
- keys = [name for name in list(target_state.keys()) if not name.endswith("_extra_state")]
+ keys = list(filter(lambda x: x is not None and not x.endswith("_extra_state"), target_state.keys()))
if len(keys) != 0:
- raise RuntimeError(f"Additional keys: {target_state.keys()} in checkpoint but not in model.")
+ raise RuntimeError(f"Additional keys: {keys} in checkpoint but not in model.")
# TODO: Is this correct?
# for key in target.state_dict():
@@ -165,7 +166,7 @@ def scale_weights(ctx):
def _default_transform(inp):
- return inp.float()
+ return inp
class StateDictTransform(Generic[F]):
@@ -324,7 +325,7 @@ def _match_keys(keys: List[str], pattern: str) -> np.ndarray:
regex_pattern = re.compile("^" + pattern.replace("*", "(.*)") + "$")
wildcard_matches = [[] for _ in range(pattern.count("*"))]
- for key in keys:
+ for key in filter(lambda x: x is not None, keys):
match = regex_pattern.match(key)
if match:
for i, group in enumerate(match.groups()):
@@ -342,7 +343,7 @@ def _match_keys(keys: List[str], pattern: str) -> np.ndarray:
output_array = np.empty(shape, dtype=object)
# Populate the array with the keys, now that we have the correct shape and ordering
- for key in keys:
+ for key in filter(lambda x: x is not None, keys):
match = regex_pattern.match(key)
if match:
# Convert match groups to indices based on their position in wildcard_matches
diff --git a/nemo/lightning/pytorch/callbacks/model_checkpoint.py b/nemo/lightning/pytorch/callbacks/model_checkpoint.py
index ed8ac25185f3..30fe0d3931e4 100644
--- a/nemo/lightning/pytorch/callbacks/model_checkpoint.py
+++ b/nemo/lightning/pytorch/callbacks/model_checkpoint.py
@@ -26,14 +26,13 @@
from pytorch_lightning.callbacks.model_checkpoint import _is_local_file_protocol
from pytorch_lightning.utilities import rank_zero_info
-from nemo.lightning.io.mixin import IOMixin
from nemo.lightning.io.pl import TrainerContext
from nemo.utils import logging
from nemo.utils.app_state import AppState
from nemo.utils.model_utils import ckpt_to_dir
-class ModelCheckpoint(PTLModelCheckpoint, IOMixin):
+class ModelCheckpoint(PTLModelCheckpoint):
UNFINISHED_CHECKPOINT_SUFFIX = "-unfinished"
@@ -51,14 +50,12 @@ def __init__(
save_best_model: bool = False,
save_on_train_epoch_end: Optional[bool] = False, # Save after training, not after validation
enable_nemo_ckpt_io: bool = True,
- async_save: bool = False,
try_restore_best_ckpt: bool = True,
**kwargs,
):
self.save_best_model = save_best_model
self.previous_best_path = ""
self.enable_nemo_ckpt_io = enable_nemo_ckpt_io
- self.async_save = async_save
# Checkpoints which removal is deferred until async save is done.
# Each element of `deferred_ckpts_to_remove` is a growing list
# that `self._remove_checkpoint` adds to. Once `self._save_checkpoint`
@@ -166,7 +163,7 @@ def nemo_topk_check_previous_run(self):
if index != len(self.monitor):
match = re.search('[A-z]', checkpoint[index:])
if match:
- value = checkpoint[index : index + match.start() - 1] # -1 due to separator hypen
+ value = checkpoint[index : index + match.start() - 1] # -1 due to separator hyphen
self.best_k_models[checkpoint] = float(value)
if len(self.best_k_models) < 1:
return # No saved checkpoints yet
@@ -221,7 +218,7 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None:
super().load_state_dict(state_dict)
self._remove_invalid_entries_from_topk()
- def setup(self, *args, **kwargs) -> None:
+ def setup(self, trainer, *args, **kwargs) -> None:
from nemo.utils.get_rank import is_global_rank_zero
if is_global_rank_zero():
@@ -230,7 +227,9 @@ def setup(self, *args, **kwargs) -> None:
# Ensure that all ranks continue with unfinished checkpoints removed
if torch.distributed.is_initialized():
torch.distributed.barrier()
- super().setup(*args, **kwargs)
+
+ self.async_save = getattr(trainer.strategy, "async_save", False)
+ super().setup(trainer, *args, **kwargs)
def on_save_checkpoint(self, trainer, pl_module, checkpoint):
output = super().on_save_checkpoint(trainer, pl_module, checkpoint)
@@ -380,6 +379,8 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str)
self.set_checkpoint_unfinished_marker(filepath, barrier_after=True)
ema_callback = self._ema_callback(trainer)
+ self._last_global_step_saved = trainer.global_step
+
if ema_callback is not None:
if self.async_save:
raise ValueError('async_save with EMA not supported')
@@ -410,6 +411,12 @@ def _save_checkpoint(self, trainer: 'pytorch_lightning.Trainer', filepath: str)
else:
storage_options = None
trainer.save_checkpoint(filepath, self.save_weights_only, storage_options=storage_options)
+
+ ## NOTE: saving context happens synchronously always
+ from nemo.utils.get_rank import is_global_rank_zero
+
+ if self.enable_nemo_ckpt_io and is_global_rank_zero():
+ TrainerContext.from_trainer(trainer).io_dump(ckpt_to_dir(filepath))
if self.async_save:
logging.info(f'Scheduled async checkpoint save for {filepath}')
else:
@@ -422,14 +429,8 @@ def _get_finalize_save_checkpoint_callback(
def _cb():
logging.debug(f'Finalize callback called for step {global_step}, filepath {filepath}')
- self._last_global_step_saved = global_step
self._last_checkpoint_saved = filepath
- from nemo.utils.get_rank import is_global_rank_zero
-
- if self.enable_nemo_ckpt_io and is_global_rank_zero():
- TrainerContext.from_trainer(trainer).io_dump(ckpt_to_dir(filepath))
-
# notify loggers
if trainer.is_global_zero:
for logger in trainer.loggers:
diff --git a/nemo/lightning/pytorch/callbacks/model_transform.py b/nemo/lightning/pytorch/callbacks/model_transform.py
index 7949f9efd28e..5d48851843fc 100644
--- a/nemo/lightning/pytorch/callbacks/model_transform.py
+++ b/nemo/lightning/pytorch/callbacks/model_transform.py
@@ -4,11 +4,10 @@
import pytorch_lightning as pl
from torch import nn
-from nemo.lightning.io.mixin import IOMixin
from nemo.utils import logging
-class ModelTransform(pl.Callback, IOMixin):
+class ModelTransform(pl.Callback):
"""
A PyTorch Lightning callback that applies a model transformation function at the start of fitting or validation.
diff --git a/nemo/lightning/pytorch/callbacks/nsys.py b/nemo/lightning/pytorch/callbacks/nsys.py
index d24d7fd974be..9848fdb2b8fd 100644
--- a/nemo/lightning/pytorch/callbacks/nsys.py
+++ b/nemo/lightning/pytorch/callbacks/nsys.py
@@ -3,12 +3,11 @@
import torch
from pytorch_lightning.callbacks.callback import Callback
-from nemo.lightning.io.mixin import IOMixin
from nemo.utils import logging
from nemo.utils.get_rank import get_rank
-class NsysCallback(Callback, IOMixin):
+class NsysCallback(Callback):
"""
A PyTorch Lightning callback for NVIDIA Nsight Systems (Nsys) profiling.
diff --git a/nemo/lightning/pytorch/callbacks/progress.py b/nemo/lightning/pytorch/callbacks/progress.py
index 17178618852f..9ccf871f820f 100644
--- a/nemo/lightning/pytorch/callbacks/progress.py
+++ b/nemo/lightning/pytorch/callbacks/progress.py
@@ -22,7 +22,7 @@ def init_train_tqdm(self):
Override bar_format to not have 's/it'.
"""
self.bar = super().init_train_tqdm()
- self.bar.bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]"
+ self.bar.bar_format = "{desc} {n_fmt}/{total_fmt}{postfix}"
return self.bar
def on_train_epoch_start(self, trainer, *_):
diff --git a/nemo/lightning/pytorch/plugins/data_sampler.py b/nemo/lightning/pytorch/plugins/data_sampler.py
index 8d023d3bb574..9b2b317223ce 100644
--- a/nemo/lightning/pytorch/plugins/data_sampler.py
+++ b/nemo/lightning/pytorch/plugins/data_sampler.py
@@ -43,12 +43,13 @@ def setup(self, global_rank: int) -> None:
def transform_dataloader(self, dataloader: DataLoader, consumed_samples: int = 0) -> DataLoader:
from nemo.lightning.data import add_megatron_sampler
+ mode = getattr(dataloader, 'mode', 'train')
return add_megatron_sampler(
dataloader,
micro_batch_size=self.micro_batch_size,
global_batch_size=self.global_batch_size,
rampup_batch_size=self.rampup_batch_size,
- consumed_samples=self.init_consumed_samples,
+ consumed_samples=self.init_consumed_samples if mode == 'train' else 0,
dataloader_type=self.dataloader_type,
)
diff --git a/nemo/lightning/pytorch/strategies.py b/nemo/lightning/pytorch/strategies.py
index 57cd33a612ae..9adfb7801f2f 100644
--- a/nemo/lightning/pytorch/strategies.py
+++ b/nemo/lightning/pytorch/strategies.py
@@ -105,6 +105,7 @@ def __init__(
lazy_init: bool = False,
pipeline_dtype: Optional[torch.dtype] = None,
save_ckpt_format='torch_dist',
+ ckpt_async_save=False,
ckpt_torch_dist_multiproc=None, ## TODO(ashors): put elsewhere?
ckpt_assume_constant_structure=False,
ckpt_parallel_save=True,
@@ -142,6 +143,7 @@ def __init__(
self.log_memory_usage = bool(int(os.getenv("NEMO_LOG_MEMORY_USAGE", 0)))
self.save_ckpt_format = save_ckpt_format
+ self.async_save = ckpt_async_save
self.torch_dist_multiproc = ckpt_torch_dist_multiproc
self.assume_constant_structure = ckpt_assume_constant_structure
self.parallel_save = ckpt_parallel_save
@@ -207,6 +209,7 @@ def setup(self, trainer: pl.Trainer) -> None:
if not self.data_sampler and hasattr(datamodule, "data_sampler"):
self.data_sampler = datamodule.data_sampler
self.data_sampler.setup(self.cluster_environment.global_rank())
+ datamodule.reconfigure_limit_batches()
if self.data_sampler:
self.data_sampler.connect(trainer)
@@ -252,6 +255,16 @@ def setup(self, trainer: pl.Trainer) -> None:
assert self.model is not None
_sync_module_states(self.model)
+ ## add AsyncFinalizerCallback if using async
+ if self.async_save:
+ have_async_callback = False
+ for callback in self.trainer.callbacks:
+ if isinstance(callback, AsyncFinalizerCallback):
+ have_async_callback = True
+ break
+ if not have_async_callback:
+ self.trainer.callbacks.append(AsyncFinalizerCallback())
+
@override
def setup_distributed(self) -> None:
self._setup_parallel_ranks()
@@ -441,7 +454,9 @@ def validation_step(self, dataloader_iter, *args: Any, **kwargs: Any) -> STEP_OU
kwargs = self._update_step_kwargs(dataloader_iter, kwargs, "validation")
with self.precision_plugin.val_step_context(): # TODO: Do we need this?
- return self.model(dataloader_iter, forward_only=True, *args, **kwargs)
+ out = self.model(dataloader_iter, forward_only=True, *args, **kwargs)
+ self.lightning_module.log('val_loss', out, rank_zero_only=True, batch_size=1)
+ return out
@override
def test_step(self, dataloader_iter, *args: Any, **kwargs: Any) -> STEP_OUTPUT:
@@ -574,11 +589,9 @@ def load_model_state_dict(self, checkpoint: Mapping[str, Any], strict: bool = Tr
@override
def checkpoint_io(self) -> CheckpointIO:
if self._checkpoint_io is None:
- checkpoint_callback = self.trainer.checkpoint_callback
- async_save = getattr(checkpoint_callback, "async_save", False)
self._checkpoint_io = MegatronCheckpointIO(
save_ckpt_format=self.save_ckpt_format,
- async_save=async_save,
+ async_save=self.async_save,
torch_dist_multiproc=self.torch_dist_multiproc,
assume_constant_structure=self.assume_constant_structure,
parallel_save=self.parallel_save,
@@ -586,15 +599,8 @@ def checkpoint_io(self) -> CheckpointIO:
parallel_load=self.parallel_load,
load_directly_on_device=self.load_directly_on_device,
)
- if async_save:
+ if self.async_save:
self._checkpoint_io = AsyncFinalizableCheckpointIO(self._checkpoint_io)
- have_async_callback = False
- for callback in self.trainer.callbacks:
- if isinstance(callback, AsyncFinalizerCallback):
- have_async_callback = True
- break
- if not have_async_callback:
- self.trainer.callbacks.append(AsyncFinalizerCallback())
elif isinstance(self._checkpoint_io, _WrappingCheckpointIO):
self._checkpoint_io.checkpoint_io = MegatronCheckpointIO()
diff --git a/nemo/lightning/pytorch/trainer.py b/nemo/lightning/pytorch/trainer.py
index 8b453832d56e..da1a77c3c731 100644
--- a/nemo/lightning/pytorch/trainer.py
+++ b/nemo/lightning/pytorch/trainer.py
@@ -17,6 +17,10 @@ def io_init(self, **kwargs) -> fdl.Config[Self]:
for val in cfg_kwargs.values():
if not serialization.find_node_traverser(type(val)):
track_io(type(val))
+ elif isinstance(val, list):
+ for v in val:
+ if not serialization.find_node_traverser(type(v)):
+ track_io(type(v))
return fdl.Config(type(self), **cfg_kwargs)
diff --git a/nemo/utils/exp_manager.py b/nemo/utils/exp_manager.py
index f4bfb8ec95c4..3a7b17ab3db0 100644
--- a/nemo/utils/exp_manager.py
+++ b/nemo/utils/exp_manager.py
@@ -165,6 +165,7 @@ class FaultToleranceParams:
initial_rank_heartbeat_timeout: Optional[float] = 60.0 * 60.0
rank_heartbeat_timeout: Optional[float] = 45.0 * 60.0
calculate_timeouts: bool = True
+ safety_factor: float = 5.0
rank_termination_signal: signal.Signals = signal.SIGKILL
log_level: str = 'INFO'
max_rank_restarts: int = 0
@@ -558,7 +559,7 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo
if HAVE_STRAGGLER_DET:
logging.info("Enabling straggler detection...")
straggler_det_args_dict = dict(cfg.straggler_detection_params)
- straggler_det_callback = StragglerDetectionCallback(**straggler_det_args_dict, logger=logging)
+ straggler_det_callback = StragglerDetectionCallback(**straggler_det_args_dict)
trainer.callbacks.append(straggler_det_callback)
else:
raise ValueError(
@@ -573,6 +574,7 @@ def exp_manager(trainer: 'pytorch_lightning.Trainer', cfg: Optional[Union[DictCo
# here we only need to know if the autoresume is enabled.
ft_use_autoresume = ft_params.max_subsequent_job_failures > 0
fault_tol_callback = FaultToleranceCallback(
+ exp_dir=Path(log_dir).parent, # log_dir is "/results/"
autoresume=ft_use_autoresume,
calculate_timeouts=ft_params.calculate_timeouts,
simulated_fault_params=ft_params.simulated_fault,
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 7706aa58b267..3169d31dbeed 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,5 +1,5 @@
fiddle
-huggingface_hub>=0.20.3,<0.24.0
+huggingface_hub>=0.24
numba
numpy>=1.22
onnx>=1.7.0
diff --git a/requirements/requirements_tts.txt b/requirements/requirements_tts.txt
index 9536faec8c78..0d499feb3b1f 100644
--- a/requirements/requirements_tts.txt
+++ b/requirements/requirements_tts.txt
@@ -1,5 +1,6 @@
attrdict
einops
+janome
jieba
kornia
librosa
diff --git a/scripts/checkpoint_converters/convert_clip_hf_to_nemo.py b/scripts/checkpoint_converters/convert_clip_hf_to_nemo.py
index 690fa74abccd..2b8156ad4b26 100644
--- a/scripts/checkpoint_converters/convert_clip_hf_to_nemo.py
+++ b/scripts/checkpoint_converters/convert_clip_hf_to_nemo.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/scripts/checkpoint_converters/convert_llava_hf_to_nemo.py b/scripts/checkpoint_converters/convert_llava_hf_to_nemo.py
index d91899348e8c..85f65ca05ecf 100644
--- a/scripts/checkpoint_converters/convert_llava_hf_to_nemo.py
+++ b/scripts/checkpoint_converters/convert_llava_hf_to_nemo.py
@@ -292,7 +292,7 @@ def convert(args):
batch_dict = hf_tokenizer(input_texts, max_length=512, padding=True, truncation=True, return_tensors='pt')
batch_dict_cuda = {k: v.cuda() for k, v in batch_dict.items()}
hf_model = hf_model.cuda().eval()
- model = model.eval()
+ model = model.cuda().eval()
hf_outputs = hf_model(**batch_dict_cuda, output_hidden_states=True)
ids = batch_dict_cuda['input_ids']
@@ -307,7 +307,7 @@ def convert(args):
attn_mask, _, pos_ids = attn_mask_and_pos_ids
outputs = model(
- tokens=tokens, text_position_ids=pos_ids.cuda(), attention_mask=attn_mask.cuda(), labels=None
+ tokens=tokens.cuda(), text_position_ids=pos_ids.cuda(), attention_mask=attn_mask.cuda(), labels=None
)
hf_next_token = hf_outputs.logits[0, -1].argmax()
diff --git a/scripts/checkpoint_converters/convert_mistral_7b_hf_to_nemo.py b/scripts/checkpoint_converters/convert_mistral_7b_hf_to_nemo.py
index 3a72661499bf..b113b94e544d 100644
--- a/scripts/checkpoint_converters/convert_mistral_7b_hf_to_nemo.py
+++ b/scripts/checkpoint_converters/convert_mistral_7b_hf_to_nemo.py
@@ -95,18 +95,22 @@ def load_model(cls, checkpoint, strict, **kwargs):
return model
-def load_config(mistral_config, tokenizer_path):
+def load_config(mistral_config, tokenizer, config_path):
nemo_config = OmegaConf.load(
os.path.join(os.path.dirname(__file__), '../../examples/nlp/language_modeling/conf/megatron_llama_config.yaml')
).model
# akoumparouli: verify this.
- nemo_config.encoder_seq_length = mistral_config['sliding_window']
+ if mistral_config.get('sliding_window', None) is not None:
+ nemo_config.encoder_seq_length = mistral_config['sliding_window']
+ else:
+ nemo_config.encoder_seq_length = mistral_config['max_position_embeddings']
nemo_config.num_layers = int(mistral_config['num_hidden_layers'])
nemo_config.hidden_size = mistral_config['hidden_size']
nemo_config.ffn_hidden_size = mistral_config['intermediate_size']
nemo_config.num_attention_heads = mistral_config['num_attention_heads']
nemo_config.max_position_embeddings = mistral_config['max_position_embeddings']
- nemo_config.window_size = [mistral_config['sliding_window'], 0]
+ if mistral_config.get('sliding_window', None) is not None:
+ nemo_config.window_size = [mistral_config['sliding_window'], 0]
nemo_config.init_method_std = mistral_config['initializer_range']
# RMSNorm's epsilon.
nemo_config.layernorm_epsilon = mistral_config['rms_norm_eps']
@@ -118,7 +122,34 @@ def load_config(mistral_config, tokenizer_path):
# Mistral uses SiLU, but it is the same as swish with beta = 1.
nemo_config.activation = 'fast-swiglu'
- nemo_config.tokenizer.model = tokenizer_path
+ # Tokenizer config
+ if hasattr(tokenizer, 'vocab_file'):
+ nemo_config.tokenizer.model = tokenizer.vocab_file
+ else:
+ # Load tekken.json, extract the 'vocab' field & write it to file.
+ vocab_path = os.path.join(config_path, 'tekken.json')
+ assert os.path.exists(vocab_path), f"Expected {vocab_path} to exist"
+ with open(vocab_path, 'rt') as fp:
+ tok_vocab = json.load(fp)
+ vocab_output_path = '/tmp/tekken.json'
+ if os.path.exists(vocab_output_path):
+ os.remove(vocab_output_path)
+ with open(vocab_output_path, 'wt') as fp:
+ json.dump(tok_vocab['vocab'], fp)
+ assert os.path.exists(vocab_output_path), f"Expected {vocab_output_path} to exist"
+ assert os.path.getsize(vocab_output_path) > 0, f"Expected {vocab_output_path} to be non-empty"
+
+ tokenizer_dict = {
+ 'library': 'tiktoken',
+ 'type': 'tiktoken',
+ 'vocab_file': vocab_output_path,
+ 'model': None,
+ 'merge_file': None,
+ 'delimiter': None,
+ 'sentencepiece_legacy': False,
+ }
+ nemo_config.tokenizer = tokenizer_dict
+
# TODO(@akoumparouli): rope_scaling.
nemo_config['rotary_base'] = mistral_config['rope_theta']
@@ -148,7 +179,7 @@ def convert(args):
logging.info(f"loading checkpoint {args.input_name_or_path}")
model_args, ckpt, tokenizer = load_mistral_ckpt(args.input_name_or_path)
- nemo_config = load_config(model_args, os.path.join(args.input_name_or_path, 'tokenizer.model'))
+ nemo_config = load_config(model_args, tokenizer, args.input_name_or_path)
logging.info(f"loaded checkpoint {args.input_name_or_path}")
if args.precision in ["32", "16"]:
@@ -197,7 +228,9 @@ def convert(args):
hidden_size = nemo_config.hidden_size
head_num = nemo_config.num_attention_heads
- head_size = hidden_size // head_num
+ head_size = model_args.get('head_dim', hidden_size // head_num)
+ # Set this explictly because 2407 does not use hidden_size // num_attention_heads
+ nemo_config.kv_channels = head_size
num_layers = nemo_config.num_layers
mcore_gpt = nemo_config.mcore_gpt
diff --git a/scripts/checkpoint_converters/convert_siglip_hf_to_nemo.py b/scripts/checkpoint_converters/convert_siglip_hf_to_nemo.py
index 97a9d557f78b..053b3a053884 100644
--- a/scripts/checkpoint_converters/convert_siglip_hf_to_nemo.py
+++ b/scripts/checkpoint_converters/convert_siglip_hf_to_nemo.py
@@ -13,11 +13,10 @@
# limitations under the License.
"""
-Requires HF transformers updated to support Gemma Models
- python3 /opt/NeMo/scripts/nlp_language_modeling/convert_gemma_hf_to_nemo.py \
- --input_name_or_path /path/to/gemma/checkpoints/hf/7b \
- --output_path /path/to/gemma-7b.nemo \
- --tokenizer_path /path/to/tokenizer.model
+Requires HF transformers updated to support Siglip Models
+ python /opt/NeMo/scripts/checkpoint_converters/convert_siglip_hf_to_nemo.py \
+ --input_name_or_path=google/siglip-so400m-patch14-384 \
+ --output_path=test.nemo
"""
import os
@@ -352,7 +351,7 @@ def get_args():
def convert(args):
logging.info(f"Loading checkpoint from HF: `{args.input_name_or_path}`")
hf_model = AutoModel.from_pretrained(args.input_name_or_path)
- # hf_processor = AutoProcessor.from_pretrained(args.input_name_or_path)
+ hf_processor = AutoProcessor.from_pretrained(args.input_name_or_path)
logging.info("HF Model loading done.")
nemo_config = OmegaConf.load(args.hparams_file)
@@ -369,6 +368,35 @@ def convert(args):
nemo_state_dict = adjust_tensor_shapes(model, new_state_dict)
model.load_state_dict(nemo_state_dict, strict=False)
+ logging.info(f'=' * 100)
+ # Verifications
+ import requests
+ from PIL import Image
+
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+ image = Image.open(requests.get(url, stream=True).raw)
+
+ texts = ["a photo of 2 cats", "a photo of 2 dogs"]
+ inputs = hf_processor(text=texts, images=image, padding="max_length", return_tensors="pt")
+
+ tokens = inputs["input_ids"].cuda()
+ text_model = model.model.text_encoder.cuda()
+ hf_text_model = hf_model.text_model.cuda()
+ text_model_output = text_model(tokens)
+ hf_text_model_output = hf_text_model(tokens).pooler_output
+ assert torch.allclose(text_model_output, hf_text_model_output, atol=0.01)
+ logging.info(f'! Text model results matched.')
+
+ pixels = inputs["pixel_values"].cuda()
+ vision_model = model.model.vision_encoder.cuda()
+ hf_vision_model = hf_model.vision_model.cuda()
+ vision_model_output = vision_model(pixels)
+ hf_vision_model_output = hf_vision_model(pixels).pooler_output
+ assert torch.allclose(vision_model_output, hf_vision_model_output, atol=0.01)
+ logging.info(f'! Vision model results matched.')
+
+ logging.info(f'=' * 100)
+
dtype = torch_dtype_from_precision(args.precision)
model = model.to(dtype=dtype)
model.save_to(args.output_path)
diff --git a/scripts/checkpoint_converters/convert_stablediffusion_hf_to_nemo.py b/scripts/checkpoint_converters/convert_stablediffusion_hf_to_nemo.py
index 67bc975708d0..ff10dab4bc90 100644
--- a/scripts/checkpoint_converters/convert_stablediffusion_hf_to_nemo.py
+++ b/scripts/checkpoint_converters/convert_stablediffusion_hf_to_nemo.py
@@ -13,13 +13,14 @@
# limitations under the License.
r"""
-Conversion script to convert HuggingFace Starcoder2 checkpoints into nemo checkpoint.
+Conversion script to convert HuggingFace StableDiffusion checkpoints into nemo checkpoint.
Example to run this conversion script:
python convert_hf_starcoder2_to_nemo.py \
--input_name_or_path \
- --output_path
+ --output_path --model
"""
+import os
from argparse import ArgumentParser
import numpy as np
@@ -29,8 +30,6 @@
from nemo.utils import logging
-intkey = lambda x: int(x)
-
def filter_keys(rule, dict):
keys = list(dict.keys())
@@ -95,7 +94,7 @@ def __getitem__(self, name: str):
return None
# either more than 1 match (error) or exactly 1 (success)
if np.sum(p_flag) > 1:
- print(f"error: multiple matches of key {name} with {keys}")
+ logging.warning(f"warning: multiple matches of key {name} with {keys}")
else:
i = np.where(p_flag)[0][0]
n = numdots(keys[i])
@@ -130,14 +129,9 @@ def get_args():
return args
-def make_tiny_config(config):
- '''dial down the config file to make things tractable'''
- # TODO
- return config
-
-
def load_hf_ckpt(in_dir, args):
ckpt = {}
+ assert os.path.isdir(in_dir), "Currently supports only directories with a safetensor file in it."
with safetensors.safe_open(in_dir + "/diffusion_pytorch_model.safetensors", framework="pt") as f:
for k in f.keys():
ckpt[k] = f.get_tensor(k)
@@ -161,9 +155,9 @@ def sanity_check(hf_tree, hf_unet, nemo_unet):
# check if i'm introducing new keys
for hfk, nk in hf_to_nemo_mapping(hf_tree).items():
if nk not in nemo_unet.keys():
- print(nk)
+ logging.info(nk)
if hfk not in hf_unet.keys():
- print(hfk)
+ logging.info(hfk)
def convert_input_keys(hf_tree: SegTree):
@@ -174,7 +168,7 @@ def convert_input_keys(hf_tree: SegTree):
# start counting blocks from now on
nemo_inp_blk = 1
down_blocks = hf_tree['down_blocks']
- down_blocks_keys = sorted(list(down_blocks.nodes.keys()), key=intkey)
+ down_blocks_keys = sorted(list(down_blocks.nodes.keys()), key=int)
for downblockid in down_blocks_keys:
block = down_blocks[str(downblockid)]
# compute number of resnets, attentions, downsamplers in this block
@@ -183,14 +177,14 @@ def convert_input_keys(hf_tree: SegTree):
downsamplers = block.nodes.get('downsamplers', SegTree())
if len(attentions) == 0: # no attentions, this is a DownBlock2d
- for resid in sorted(list(resnets.nodes.keys()), key=intkey):
+ for resid in sorted(list(resnets.nodes.keys()), key=int):
resid = str(resid)
resnets[resid].convert_name = f"input_blocks.{nemo_inp_blk}.0"
map_resnet_block(resnets[resid])
nemo_inp_blk += 1
elif len(attentions) == len(resnets):
# there are attention blocks here -- each resnet+attention becomes a block
- for resid in sorted(list(resnets.nodes.keys()), key=intkey):
+ for resid in sorted(list(resnets.nodes.keys()), key=int):
resid = str(resid)
resnets[resid].convert_name = f"input_blocks.{nemo_inp_blk}.0"
map_resnet_block(resnets[resid])
@@ -199,7 +193,6 @@ def convert_input_keys(hf_tree: SegTree):
nemo_inp_blk += 1
else:
logging.warning("number of attention blocks is not the same as resnets - whats going on?")
-
# if there is a downsampler, then also append it
if len(downsamplers) > 0:
for k in downsamplers.nodes.keys():
@@ -217,10 +210,9 @@ def clean_convert_names(tree):
def map_attention_block(att_tree: SegTree):
'''this HF tree can either be an AttentionBlock or a DualAttention block
currently assumed AttentionBlock
-
'''
- # TODO (rohit): Add check for dual attention block
+ # TODO(@rohitrango): Add check for dual attention block, but this works for both SD and SDXL
def check_att_type(tree):
return "att_block"
@@ -237,7 +229,7 @@ def check_att_type(tree):
dup_convert_name_recursive(tblock['norm1'], 'attn1.norm')
dup_convert_name_recursive(tblock['norm2'], 'attn2.norm')
dup_convert_name_recursive(tblock['norm3'], 'ff.net.0')
- # map ff module
+ # map ff
tblock['ff'].convert_name = "ff"
tblock['ff.net'].convert_name = 'net'
dup_convert_name_recursive(tblock['ff.net.0'], '1')
@@ -272,12 +264,16 @@ def hf_to_nemo_mapping(tree: SegTree):
def convert_cond_keys(tree: SegTree):
# map all conditioning keys
- tree['add_embedding'].convert_name = 'label_emb.0'
- dup_convert_name_recursive(tree['add_embedding.linear_1'], '0')
- dup_convert_name_recursive(tree['add_embedding.linear_2'], '2')
- tree['time_embedding'].convert_name = 'time_embed'
- dup_convert_name_recursive(tree['time_embedding.linear_1'], '0')
- dup_convert_name_recursive(tree['time_embedding.linear_2'], '2')
+ if tree.nodes.get("add_embedding"):
+ logging.info("Add embedding found...")
+ tree['add_embedding'].convert_name = 'label_emb.0'
+ dup_convert_name_recursive(tree['add_embedding.linear_1'], '0')
+ dup_convert_name_recursive(tree['add_embedding.linear_2'], '2')
+ if tree.nodes.get("time_embedding"):
+ logging.info("Time embedding found...")
+ tree['time_embedding'].convert_name = 'time_embed'
+ dup_convert_name_recursive(tree['time_embedding.linear_1'], '0')
+ dup_convert_name_recursive(tree['time_embedding.linear_2'], '2')
def convert_middle_keys(tree: SegTree):
@@ -298,7 +294,7 @@ def convert_output_keys(hf_tree: SegTree):
'''output keys is similar to input keys'''
nemo_inp_blk = 0
up_blocks = hf_tree['up_blocks']
- up_blocks_keys = sorted(list(up_blocks.nodes.keys()), key=intkey)
+ up_blocks_keys = sorted(list(up_blocks.nodes.keys()), key=int)
for downblockid in up_blocks_keys:
block = up_blocks[str(downblockid)]
@@ -307,8 +303,8 @@ def convert_output_keys(hf_tree: SegTree):
attentions = block.nodes.get('attentions', SegTree())
upsamplers = block.nodes.get('upsamplers', SegTree())
- if len(attentions) == 0: # no attentions, this is a DownBlock2d
- for resid in sorted(list(resnets.nodes.keys()), key=intkey):
+ if len(attentions) == 0: # no attentions, this is a UpBlock2D
+ for resid in sorted(list(resnets.nodes.keys()), key=int):
resid = str(resid)
resnets[resid].convert_name = f"output_blocks.{nemo_inp_blk}.0"
map_resnet_block(resnets[resid])
@@ -316,7 +312,7 @@ def convert_output_keys(hf_tree: SegTree):
elif len(attentions) == len(resnets):
# there are attention blocks here -- each resnet+attention becomes a block
- for resid in sorted(list(resnets.nodes.keys()), key=intkey):
+ for resid in sorted(list(resnets.nodes.keys()), key=int):
resid = str(resid)
resnets[resid].convert_name = f"output_blocks.{nemo_inp_blk}.0"
map_resnet_block(resnets[resid])
@@ -326,11 +322,13 @@ def convert_output_keys(hf_tree: SegTree):
else:
logging.warning("number of attention blocks is not the same as resnets - whats going on?")
- # if there is a downsampler, then also append it
+ # if there is a upsampler, then also append it
if len(upsamplers) > 0:
- # for k in upsamplers.nodes.keys():
nemo_inp_blk -= 1
- upsamplers['0'].convert_name = f"output_blocks.{nemo_inp_blk}.2"
+ upsamplenum = (
+ 1 if len(attentions) == 0 else 2
+ ) # if there are attention modules, upsample is module2, else it is module 1 (to stay consistent with SD)
+ upsamplers['0'].convert_name = f"output_blocks.{nemo_inp_blk}.{upsamplenum}"
dup_convert_name_recursive(upsamplers['0.conv'], 'conv')
nemo_inp_blk += 1
@@ -387,6 +385,7 @@ def convert_decoder(hf_tree: SegTree):
decoder['mid_block'].convert_name = 'mid'
dup_convert_name_recursive(decoder[f'mid_block.resnets.0'], 'block_1')
dup_convert_name_recursive(decoder[f'mid_block.resnets.1'], 'block_2')
+ # attention blocks
att = decoder['mid_block.attentions.0']
att.convert_name = 'attn_1'
dup_convert_name_recursive(att['group_norm'], 'norm')
@@ -443,6 +442,7 @@ def convert(args):
for hf_key, nemo_key in mapping.items():
nemo_ckpt[nemo_key] = hf_ckpt[hf_key]
+ # save this
torch.save(nemo_ckpt, args.output_path)
logging.info(f"Saved nemo file to {args.output_path}")
diff --git a/scripts/deploy/nlp/deploy_triton.py b/scripts/deploy/nlp/deploy_triton.py
index 9d9f0fa200f0..01be9ff63a0d 100755
--- a/scripts/deploy/nlp/deploy_triton.py
+++ b/scripts/deploy/nlp/deploy_triton.py
@@ -80,7 +80,7 @@ def get_args(argv):
parser.add_argument(
"-tmr", "--triton_model_repository", default=None, type=str, help="Folder for the trt-llm conversion"
)
- parser.add_argument("-ng", "--num_gpus", default=1, type=int, help="Number of GPUs for the deployment")
+ parser.add_argument("-ng", "--num_gpus", default=None, type=int, help="Number of GPUs for the deployment")
parser.add_argument("-tps", "--tensor_parallelism_size", default=1, type=int, help="Tensor parallelism size")
parser.add_argument("-pps", "--pipeline_parallelism_size", default=1, type=int, help="Pipeline parallelism size")
parser.add_argument(
@@ -95,7 +95,13 @@ def get_args(argv):
parser.add_argument("-mol", "--max_output_len", default=256, type=int, help="Max output length of the model")
parser.add_argument("-mbs", "--max_batch_size", default=8, type=int, help="Max batch size of the model")
parser.add_argument("-mnt", "--max_num_tokens", default=None, type=int, help="Max number of tokens")
+ parser.add_argument("-msl", "--max_seq_len", default=None, type=int, help="Maximum number of sequence length")
+ parser.add_argument("-mp", "--multiple_profiles", default=False, action='store_true', help="Multiple profiles")
parser.add_argument("-ont", "--opt_num_tokens", default=None, type=int, help="Optimum number of tokens")
+ parser.add_argument(
+ "-gap", "--gpt_attention_plugin", default="auto", type=str, help="dtype of gpt attention plugin"
+ )
+ parser.add_argument("-gp", "--gemm_plugin", default="auto", type=str, help="dtype of gpt plugin")
parser.add_argument(
"-mpet", "--max_prompt_embedding_table_size", default=None, type=int, help="Max prompt embedding table size"
)
@@ -284,6 +290,7 @@ def get_trtllm_deployable(args):
max_batch_size=args.max_batch_size,
max_num_tokens=args.max_num_tokens,
opt_num_tokens=args.opt_num_tokens,
+ max_seq_len=args.max_seq_len,
use_parallel_embedding=args.use_parallel_embedding,
max_prompt_embedding_table_size=args.max_prompt_embedding_table_size,
paged_kv_cache=(not args.no_paged_kv_cache),
@@ -293,6 +300,9 @@ def get_trtllm_deployable(args):
use_lora_plugin=args.use_lora_plugin,
lora_target_modules=args.lora_target_modules,
max_lora_rank=args.max_lora_rank,
+ multiple_profiles=args.multiple_profiles,
+ gpt_attention_plugin=args.gpt_attention_plugin,
+ gemm_plugin=args.gemm_plugin,
)
except Exception as error:
raise RuntimeError("An error has occurred during the model export. Error message: " + str(error))
diff --git a/scripts/installers/install_k2.sh b/scripts/installers/install_k2.sh
index 18d948209ab8..6de80ecae3eb 100755
--- a/scripts/installers/install_k2.sh
+++ b/scripts/installers/install_k2.sh
@@ -15,7 +15,7 @@
# limitations under the License.
K2_REPO=https://github.com/k2-fsa/k2
-LATEST_RELEASE=525cfa5 # fix for PyTorch 2.2.0
+LATEST_RELEASE=5735fa7 # fix for PyTorch 2.4.0
# uncomment the following line after the next k2 version is released (>1.24.4)
#LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \
# ls-remote --exit-code --refs --sort='version:refname' --tags ${K2_REPO} '*.*' \
diff --git a/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py b/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py
index a80900e30004..4aa366bc4007 100644
--- a/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py
+++ b/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py
@@ -126,7 +126,8 @@
event_prompts = [
"What is the action performed in this video?",
- "Can you highlight the action performed in this video?" "What is the main event or action captured in this video?",
+ "Can you highlight the action performed in this video?",
+ "What is the main event or action captured in this video?",
"Could you summarize the sequence of events depicted in this video?",
]
diff --git a/scripts/multimodal_dataset_conversion/convert_video_qa_dataset.py b/scripts/multimodal_dataset_conversion/convert_video_qa_dataset.py
index 6e8af1229bcf..72b75718a48a 100644
--- a/scripts/multimodal_dataset_conversion/convert_video_qa_dataset.py
+++ b/scripts/multimodal_dataset_conversion/convert_video_qa_dataset.py
@@ -120,7 +120,7 @@ def repl(match):
return time_to_string(value) + f""
value = re.sub(r"<([\d.]{1,20})s>", repl, value)
- value = re.sub(r"\s([\d.]{1,20})s[\s|\.|,|>]", repl, value)
+ value = re.sub(r"\s([\d.]{1,20})s[\s\.,>]", repl, value)
value = re.sub(r"\s([\d.]{1,20}) seconds", repl, value)
value = re.sub(r"\s([\d.]{1,20}) second", repl, value)
diff --git a/scripts/nlp_language_modeling/prepare_packed_ft_dataset.py b/scripts/nlp_language_modeling/prepare_packed_ft_dataset.py
index b3251e75c84e..bd6591d90c6a 100644
--- a/scripts/nlp_language_modeling/prepare_packed_ft_dataset.py
+++ b/scripts/nlp_language_modeling/prepare_packed_ft_dataset.py
@@ -105,6 +105,7 @@ def tokenize_dataset(cfg: 'DictConfig'):
tokens_to_generate=data_cfg.get('tokens_to_generate', 0),
memmap_workers=data_cfg.get('memmap_workers', None),
hf_dataset=data_cfg.get('hf_dataset', False),
+ global_sample_mapping=data_cfg.get('global_sample_mapping', False),
truncation_method=data_cfg.get('truncation_method', 'right'),
special_tokens=data_cfg.get('chat_prompt_tokens', None),
is_test=True,
diff --git a/scripts/speech_recognition/convert_hf_dataset_to_nemo.py b/scripts/speech_recognition/convert_hf_dataset_to_nemo.py
index 2cb7ae56df60..e6612974952b 100644
--- a/scripts/speech_recognition/convert_hf_dataset_to_nemo.py
+++ b/scripts/speech_recognition/convert_hf_dataset_to_nemo.py
@@ -362,6 +362,7 @@ def main(cfg: HFDatasetConversionConfig):
cache_dir=None,
streaming=cfg.streaming,
token=cfg.use_auth_token,
+ trust_remote_code=True,
)
except Exception as e:
diff --git a/scripts/tts_dataset_files/ja_JP/ja_JP_nv240719.dict b/scripts/tts_dataset_files/ja_JP/ja_JP_nv240719.dict
new file mode 100644
index 000000000000..af75f1b0a03c
--- /dev/null
+++ b/scripts/tts_dataset_files/ja_JP/ja_JP_nv240719.dict
@@ -0,0 +1,16891 @@
+;;; # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+;;; #
+;;; # Licensed under the Apache License, Version 2.0 (the "License");
+;;; # you may not use this file except in compliance with the License.
+;;; # You may obtain a copy of the License at
+;;; #
+;;; # http://www.apache.org/licenses/LICENSE-2.0
+;;; #
+;;; # Unless required by applicable law or agreed to in writing, software
+;;; # distributed under the License is distributed on an "AS IS" BASIS,
+;;; # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+;;; # See the License for the specific language governing permissions and
+;;; # limitations under the License.
+;;;
+
+是非とも zehi to̞ mo̞
+キャパシタ kʲɑpɑʃitɑ
+アパート ɑpɑːto
+多数決 tasɯːketsɯ
+放っ hɑnɑʔ
+驚愕 kʲoːgɑkɯ
+薀蓄 ɯntʃikɯ
+度胸 do̞kʲo̞ɯ
+疫学 ekigɑkɯ
+落語 rɑkɯgo
+孤立 ko̞rit͡sɯ
+併行 heːko̞ː
+奇しくも kɯʃikɯmo̞
+ごく go̞kɯ
+時効 dʒiko̞ɯ
+フーズ ɸɯːzɯ
+松村 mɑt͡sɯmɯrɑ
+枚 bɑi
+お疲れ様 otsɯkaɾesama
+芽吹く me̞bɯkɯ
+ジム dʒimɯ
+振り返ろ ɸɯrikɑero
+はびこる hɑbikorɯ
+和樹 kɑzɯki
+つれ t͡sɯre̞
+橋梁 kʲo̞ɯrʲo̞ɯ
+トップアスリート toʔpɯasɯriːto
+倉 kɯrɑ
+ワイワイ ɯaiɯai
+こさ kosɑ
+お待ち omɑtʃi
+来春 rɑiʃɯn
+思い悩む omoinɑjɑmɯ
+全般 zenpɑn
+継ぐ t͡sɯgɯ
+島倉 ʃimɑkɯrɑ
+口コミ kɯtʃiko̞mi
+かつて kɑt͡sɯte
+東国 to̞ɯgo̞kɯ
+視 mi
+活字 kɑt͡sɯdʒi
+吸い込ま sɯikoma
+イラスト iɾasɯto
+揚げ ɑge
+前門 zenmo̞n
+行きつけ jɯkit͡sɯke̞
+ちがい tʃigɑi
+判定 hɑnteː
+値上げ neɑge
+ピエロ piero̞
+ましてや mɑʃite jɑ
+無機質 mɯkiʃit͡sɯ
+ぐるり gɯrɯri
+喘息 zɛnsokɯ
+ジャスコ jasɯko
+公平 ko̞ːheː
+セールス seːɾɯsɯ
+骨子 kɯsʃi
+ホーチミン ho̞ːtʃimin
+失言 ʃit͡sɯge̞n
+コフレ ko̞ɸɯre
+狡猾 koːkɑt͡sɯ
+民族 minzo̞kɯ
+欠い kɑi
+力量 rikirʲo̞ɯ
+筋合い sɯʒjai
+柄 gɑrɑ
+チャン tʃɑn
+蒔絵 mɑkie
+白人 hɑkɯdʒin
+略 rʲɑkɯ
+なんにも nɑnni mo
+ケニア keniɑ
+か kɑ
+血気 ke̞ʔki
+代用 dɑijoː
+弾い hii
+立ち上がっ tɑtʃiɑgɑʔ
+合唱 gɑʔʃoː
+重い o̞mo̞i
+皮肉 hinikɯ
+年号 nengo̞ɯ
+並び nɑrɑbi
+初日 ʃo̞nitʃi
+トレーダー toreːdɑː
+願い negɑi
+野手組 jɑʃɯ kɯmi
+論じ ro̞ndʒi
+行事 gʲo̞ɯdʒi
+見送り mio̞kɯri
+地合い dʒiɑi
+商業 ʃo̞ɯgʲo̞ɯ
+マルコー mɑrɯkoː
+北米 ho̞kɯbeː
+柔軟 dʒɯːnɑn
+日出夫 hideo̞
+め me̞
+ユンホ jɯn ho̞
+朝日奈 ɑsɑinɑ
+バカ bɑkɑ
+バーチャル bɑːtʃɑrɯ
+教育 kʲo̞ɯikɯ
+提出 te̞ːʃɯt͡sɯ
+愛煙 ɑien
+す sɯ
+かり kɑri
+かわし kaɯaʃi
+道理 do̞ɯri
+視察 ʃisɑtsɯ
+馬券 bɑken
+黒沢尻 kɯrosawaʃiɾi
+咲かせる sakaseɾɯ
+乗組 no̞rikɯmi
+宗教 ʃɯːkʲo̞ɯ
+メロディー mero̞diː
+伊勢崎 isesɑki
+ゆず jɯzɯ
+曲線 kjokɯsen
+始まり hɑdʒimɑri
+負け越し mɑkekoʃi
+オンライン on rɑin
+回想 kɑisoː
+空輸 kɯːjɯ
+減っ he̞ʔ
+滲む nidʒimɯ
+計上 keːdʒo̞ː
+魂 tɑmɑʃiː
+今里 imɑsɑto
+横たわっ jokotaɯaʔ
+足立 ɑdɑtʃi
+新作 ʃinsakɯ
+ぱいしててもさちっぽけなの pɑiʃitetemosɑtʃiʔpokenɑno
+パン pɑn
+吸っ sɯʔ
+接戦 sɛsən
+銘じ me̞ːdʒi
+スポンサー sɯponsaː
+逸機 iʔki
+番外 bɑngɑi
+洋式 jo̞ɯʃiki
+小結 komɯsɯbi
+潤っ ɯrɯo̞ʔ
+もらお morɑo
+風評 ɸɯːço̞ɯ
+抽選 tʃɯːsen
+臆する okɯsɯɾɯ
+において ni o̞ite
+食物 ʃo̞kɯmo̞t͡sɯ
+陛下 heːkɑ
+エアバス eabasɯ
+片側 katagaɯa
+美姫 miki
+品 ʃinɑ
+転載 tensɑi
+使用人 ʃijo̞ɯnin
+投 nɑge
+地図 tʃizɯ
+導い mitʃibii
+同学 doːgɑkɯ
+回せる mawaseɾɯ
+秋葉原 ɑkihɑbɑrɑ
+五感 gokɑn
+クビ kɯbi
+歯ブラシ hɑ bɯrɑʃi
+月山 gɑsɑn
+思い o̞mo̞i
+疑わしい ɯtagaɯaʃiː
+かわい kaɯai
+士官 ʃikɑn
+上がる ɑgɑrɯ
+間合い mɑːi
+緑色 mido̞riiro̞
+浜崎 hɑmɑzɑki
+珍しい mezɯrɑʃiː
+無趣味 mɯʃɯmi
+東北 to̞ɯho̞kɯ
+ケンカ kenkɑ
+有線 jɯːsen
+堅調 kentʃo̞ɯ
+泡立つ aɯadat͡sɯ
+内容 nɑijoː
+切実 setsɯʒitsɯ
+占拠 senkjo̞
+グラデーション gɯrɑdeːʃon
+底力 sokozikɑɾɑ
+選べ erɑbe
+毒 do̞kɯ
+降下 koːkɑ
+去り sɑɾi
+だけ dɑke
+下ろし o̞ro̞ʃi
+四苦八苦 ʃikɯ hɑʔkɯ
+ご存知 go̞zo̞ndʒi
+ゼファー zeɸɑː
+リサイクル ɾisaikɯɾɯ
+なるべく nɑrɯbekɯ
+トナカイ tonɑkɑi
+性的 se̞ːte̞ki
+珍獣 tʃindʒɯː
+記し ʃirɯʃi
+斬る kirɯ
+重用 tʃo̞ɯjo̞ɯ
+豚肉 bɯtɑnikɯ
+向き合っ mɯkiɑʔ
+跳べ to̞be
+マクド mɑkɯdo
+華麗 kɑreː
+テナント tenɑnto
+めぐり me̞gɯri
+つくば t͡sɯkɯbɑ
+勿論 mo̞tʃiro̞n
+クレヨンしんちゃん kɯrejon ʃintʃɑn
+オセチア osekiɑ
+ひと味 hitoɑdʒi
+核兵器 kɑkɯ heːki
+オケ o̞ke
+姑息 kosokɯ
+元本 gɑnpon
+間に合わ maniaɯa
+経費 ke̞ːhi
+武田 tɑkedɑ
+ひとくち hito̞kɯtʃi
+はず hɑzɯ
+たしか tɑʃikɑ
+成っ nɑʔ
+用紙 jo̞ɯʃi
+苦情 kɯdʒo̞ɯ
+こびりつい ko̞birit͡sɯi
+法律 ho̞ɯrit͡sɯ
+二つ割り ɸɯtat͡sɯ ɯari
+延ばす nobasɯ
+國井 kɯnii
+洗顔 seŋgɑn
+実験 dʒiʔke̞n
+ありえ ɑri e
+言い過ぎ iisɯgi
+始まれ hɑdʒimɑre
+資生堂 ʃiseːdo̞ː
+直球 tʃo̞ʔkʲɯː
+はつぎ ɯa t͡sɯgi
+リンク rinkɯ
+和文 ɯabɯn
+どうぞ do̞ɯzo̞
+モチベート mo̞tʃibeːto̞
+集客 ʃɯːkʲɑkɯ
+杉 sɯgi
+ぶり bɯri
+知らせる ʃiɾaseɾɯ
+とびきり to̞bikiri
+各論 kɑkɯron
+さむい samɯi
+貰い morɑi
+いただく itɑdɑkɯ
+メガ megɑ
+冷える hie̞rɯ
+丸子 mɑrɯko
+男子 dɑnʃi
+狂い kɯrɯi
+集 ʃɯː
+汚職 o̞ʃo̞kɯ
+参考 sɑnkoː
+おどろく o̞do̞ro̞kɯ
+触ろ saɯaro
+準ずる dʒɯnzɯrɯ
+役立て jɑkɯdɑte
+千尋 tʃihiro̞
+映し ɯt͡sɯʃi
+冨田 tomitɑ
+鷹 tɑkɑ
+使いこなし t͡sɯkɑi konɑʃi
+決戦 kɛsən
+自民党 dʒiminto̞ɯ
+井の頭公園 inokɑʃirɑ koːen
+害 gɑi
+在学 zɑigɑkɯ
+健脚 kenkʲɑkɯ
+手伝っ tet͡sɯdɑʔ
+昨秋 sakɯshɯː
+愉し tɑnoʃi
+定規 dʒo̞ɯgi
+あらかじめ ɑrɑkɑdʒime
+身体 ʃintɑi
+自立 dʒirit͡sɯ
+ど do̞
+見どころ mido̞ko̞ro̞
+抜け落ち nɯkeo̞tʃi
+学部 gɑkɯbɯ
+在庫 zɑiko
+生かす ikasɯ
+渦 ɯzɯ
+サクラ sakɯra
+ステキ sɯteki
+中年 tʃɯːne̞n
+カーブ kɑːbɯ
+キビ kibi
+入り浸り iribitɑri
+断念 dɑnnen
+民俗 minzo̞kɯ
+勢力 seːɾjokɯ
+滑り台 sɯbeɾidai
+ブライアント bɯrɑiɑnto
+落 o̞tʃi
+溢れる ɑɸɯrerɯ
+ガキ gɑki
+かみ殺し kɑmikoroʃi
+築く kizɯkɯ
+入る hɑirɯ
+玄関 genkɑn
+ボストン bosɯton
+癖 kɯze
+フェア ɸeɑ
+春川 harɯkaɯa
+修羅場 ʃɯrɑdʒoː
+日立建機 hitɑtʃi kenki
+やっ jɑʔ
+大卒 daisotsɯ
+気がつか ki gɑ t͡sɯ kɑ
+停止 te̞ːʃi
+羽田空港 hɑtɑ kɯːkoː
+市販 ʃihɑn
+下図 ʃitɑzɯ
+黒く kɯro̞kɯ
+オリジナル oridʒinɑrɯ
+左 hidɑri
+標的 ço̞ɯteki
+保留 ho̞rʲɯː
+代謝 tɑiʃɑ
+暴発 boːhɑt͡sɯ
+インタフェース intafeːsɯ
+噛む kɑmɯ
+訪れる o̞to̞zɯrerɯ
+トレンド to̞rendo̞
+コンタクトレンズ kontɑkɯto renzɯ
+カビ kɑbi
+飛びつい to̞bit͡sɯi
+投資 to̞ɯʃi
+体温 tɑion
+そして so̞ʃite
+諮問 ʃimo̞n
+盟主 me̞ːʃɯ
+半導体 hɑn doːtɑi
+ふと ɸɯto̞
+株式会社 kɑbɯʃiki kɑiʃɑ
+人びと hito̞bito̞
+万歳 bɑnzɑi
+限界 genkɑi
+合理 go̞ɯri
+減耗 genmo̞ɯ
+憲 ke̞n
+グリーディーエヌエー gɯriːdiːe̞nɯe̞ː
+まなざし mɑnɑzɑʃi
+規則 kisokɯ
+嫌い kirɑi
+関西 kɑnsɑi
+初期 ʃo̞ki
+調子 tʃo̞ɯʃi
+お湯 o̞jɯ
+冥福 me̞ːɸɯkɯ
+当選 toːsɛn
+動作 doːaɑ
+精神 se̞ːʃin
+明白 meːhɑkɯ
+鐘 kɑne
+重量 dʒɯːrʲo̞ɯ
+しっかり ʃiʔkɑri
+コミカル komikɑrɯ
+主催 ʃɯzai
+老 o̞i
+住まい sɯmai
+外側 sotoɰᵝaɯa
+袋 ɸɯkɯro̞
+年取れ to̞ʃito̞re
+賛否 sɑnpi
+グリーン gɯriːn
+ルソン ɾɯzon
+ちがっ tʃigɑʔ
+茶飯事 sɑhɑɲi
+構図 ko̞ɯzɯ
+持ち歩い motʃiɑrɯi
+組め kɯme̞
+分かる ɯakarɯ
+頂戴 tʃoːdɑi
+形 kɑtɑtʃi
+希望 kibo̞ɯ
+思い出深い omoide bɯkɑi
+トライアル torɑiɑrɯ
+先物 sɑkimono
+ラード rɑːdo
+夕立ち jɯːdɑtʃi
+うえ ɯe̞
+鄭 te̞i
+警視庁 keːʃitʃo̞ː
+こっち ko̞ʔtʃi
+一時 itʃi dʒi
+ひとこと hito̞ko̞to̞
+激怒 gekido̞
+笑える ɯaraerɯ
+士気 ʃiki
+言い訳 iiɯake
+重松 ʃigemɑt͡sɯ
+痕跡 ko̞nseki
+おくれ o̞kɯre
+選考 senko̞ː
+手数料 tesɯːrjow
+絶大 zet͡sɯdɑi
+材木 zɑimokɯ
+融通 jɯːzɯː
+ヤクザ jɑkɯzɑ
+逆転 gʲɑkɯten
+まとも mɑtomo
+出身 ʃɯʔʃɪn
+招待 ʃoːtɑi
+向く mɯkɯ
+カラオケ kɑrɑoke
+復元 ɸɯkɯge̞n
+実況 dʒiʔkʲo̞ɯ
+ライバル rɑibɑrɯ
+買い上げ kɑiɑge
+ムズムズ mɯzɯmɯzɯ
+怖がる koɯagarɯ
+対し tɑiʃi
+像 zo̞ɯ
+い i
+文化財 bɯnkɑzɑi
+スキャンダル sɯkjandaɾɯ
+覗い no̞zo̞i
+駆け寄っ kɑkejoʔ
+阿呆 ɑhoː
+楽しめ tɑnoʃime
+束 tɑbɑ
+不定期 ɸɯte̞iki
+特設 tokɯsetsɯ
+生長 seːtʃo̞ː
+相手 ɑite
+疾患 ʃiʔkɑn
+溝 mizo̞
+お目にかかり ome ni kɑkɑri
+促す ɯnaɰᵝasɯ
+やはり jɑhɑri
+美し ɯt͡sɯkɯʃi
+隠蔽 inpe̞ː
+原告 genko̞kɯ
+真似 mɑne
+飾り kɑzɑri
+三日坊主 miʔkɑ boːzɯ
+単調 tɑntʃoː
+茶飲み話 tʃɑ nomibɑnɑʃi
+東 higɑʃi
+ぺたんこになったおなかも petɑnko ni nɑʔtɑ onɑkɑ mo
+集まっ ɑt͡sɯmɑʔ
+生やし hɑjɑʃi
+億 o̞kɯ
+心境 ʃinkʲo̞ɯ
+ばあちゃん bɑːtʃɑn
+背中 senɑkɑ
+ルブタン rɯbɯtɑn
+リアクション riɑkɯʃon
+協会 kʲoːkɑi
+突入 to̞t͡sɯɲɯː
+論客 ronkʲɑkɯ
+福士 ɸɯkɯʃi
+蝶 tʃo̞ɯ
+見極め mikiɯame
+ジゾン dʒizo̞n
+マイ mɑi
+マウンド mɑɯndo
+姓 se̞ː
+南 minɑmi
+刺さる sasaɾɯ
+読みで jo̞mi de
+防塵 bo̞ɯdʒin
+アナウンス anaɯnsɯ
+欲求 jo̞ʔkʲɯː
+ポジション po̞dʒiʃo̞n
+一覧 itʃirɑn
+破局 hɑkʲokɯ
+ランキング rɑnkingɯ
+アカ ɑkɑ
+岩手大 iɯatedai
+増やす fɯjasɯ
+おろし金 oroʃigɑne
+モバイル mobɑirɯ
+恵里子 eriko̞
+進 sɯsɯmɯ
+なでしこ nɑdeʃiko
+在住 zɑidʒɯː
+大阪城ホール oːsakadʒoːhoːrɯ
+単価 tɑnkɑ
+関わる kakaɯarɯ
+汐留 ʃio̞do̞me
+だの dɑ no
+廉 re̞n
+厳罰 genbɑt͡sɯ
+夏目 nɑt͡sɯme
+春休み haɾɯjasɯmi
+回ら maɯara
+もみあい momiɑi
+牽制 ke̞nse̞ː
+梱 ko̞ri
+クィクィ kiki
+要綱 jo̞ɯko̞ɯ
+活気 kɑʔki
+ちりばめ tʃiribɑme
+所属 ʃo̞zo̞kɯ
+ほじる ho̞dʒirɯ
+バターバニラエッセンス bataːbaniraeʔsɛnsɯ
+シェリー ʃe̞riː
+一目 itʃimo̞kɯ
+光景 ko̞ːkeː
+にぎわい nigiɯai
+次週 dʒiʃɯː
+滑り出し sɯbeɾidaʃi
+拭え nɯgɯe̞
+尾羽 ohɑ
+同校 do̞ɯko̞ɯ
+作っ t͡sɯkɯʔ
+ミルク mirɯkɯ
+明快 meːkɑi
+念 ne̞n
+追いかける oikɑkerɯ
+簿 bo̞
+資材 ʃizɑi
+分から bɯn kɑrɑ
+欲 jo̞kɯ
+すれ違う sɯɾetʃiɣaw
+減産 gənˈzɑn
+言い表せ iːaɾaɯase
+短編 tɑnpen
+丸三証券 marɯsanʃoːken
+恋する kojːsɯɾɯ
+手口 te̞gɯtʃi
+漫読 mɑndokɯ
+金欠 kinke̞t͡sɯ
+鎖 kɯsaɾi
+比喩 hijɯ
+教え子 o̞ʃiego̞
+違い tʃigɑi
+さとみ sɑtomi
+操業 soːgoː
+モノクロ mo̞no̞kɯro̞
+隣接 ɾinse̞tsɯ
+吐か hɑkɑ
+放課後 hoːkɑgo
+ウマ ɯmɑ
+チェーン tʃe̞ːn
+なじん nɑdʒin
+ヒゲ hige̞
+求刑 kʲɯːke̞ː
+解消 kɑiʃoː
+蓄え takɯɯae
+そろっ so̞ɾo̞ʔ
+悪行 ɑkɯgʲoː
+わく ɯakɯ
+ヤヒア jɑ hiɑ
+冷蔵 reːzo̞ː
+欠勤 ke̞ʔkin
+苦汁 kɯdʒɯː
+容量 jo̞ɯrʲo̞ɯ
+頭 ɑtɑmɑ
+さらなる saɾanaɾɯ
+波瀾万丈 hɑrɑn bɑndʒoː
+衰え o̞to̞ro̞e
+物入り mo̞no̞iri
+ぽかぽか pokɑpokɑ
+勇猛 jɯːmo̞ɯ
+達朗 tɑt͡sɯroː
+惜しん o̞ʃin
+前方 zenpo̞ɯ
+偏狭 henkʲo̞ɯ
+方針 ho̞ɯʃin
+扉 tobirɑ
+矢作 jɑhɑgi
+スービック sɯːbikʔɯ
+続々 zo̞kɯzo̞kɯ
+両面 rʲo̞ɯmen
+日常 nitʃidʒo̞ɯ
+薬味 jɑkɯmi
+まだまだ mɑdɑmɑdɑ
+ネイティブアメリカン neitibɯ ɑmerikɑn
+くじ引き kɯdʒibiki
+見学 kengɑkɯ
+エナメル enɑmerɯ
+いろいろ iro̞iro̞
+秀才 ʃɯːsai
+同期 do̞ɯki
+為さ tɑmesɑ
+中味 nɑkɑmi
+しらける ʃirɑkerɯ
+れれ re̞re̞
+河水 kasɯi
+のに no̞ ni
+ままならない mɑmɑ nɑrɑnɑi
+僕ら bokɯrɑ
+起き o̞ki
+山中湖 santʃɯːko
+最盛 sɑiseː
+電車 denʃɑ
+隔絶 kɑkɯzet͡sɯ
+爪痕 t͡sɯmeɑto
+慶 jo̞ro̞ko̞bi
+どおり do̞ːri
+手紙 tegɑmi
+買い占める kɑiʃimerɯ
+膠着 koːtʃɑkɯ
+実 dʒit͡sɯ
+リー riː
+中小 tʃɯːʃo̞ɯ
+売れっ子 ɯreʔko̞
+目覚まし mezɑmɑʃi
+萎縮 iʃɯkɯ
+職場 ʃokɯbɑ
+キャンドル kʲɑndorɯ
+厚生 ko̞ːseː
+不平 ɸɯhe̞ː
+デマ demɑ
+キノコ kino̞ko̞
+分かち ɯakatʃi
+西多摩 niʃitɑmɑ
+ファナックトヨタソニー fanaʔkɯtojotasoniː
+リピーター ripiːtɑː
+助産 dʑosɑn
+しみじみ ʃimidʒimi
+途切れる to̞girerɯ
+おかみ okɑmi
+すん sɯn
+膨張 bo̞ɯtʃo̞ɯ
+尉 dʒo̞ɯ
+似合う niɑɯ
+取り交わす toɾikawasɯ
+グローベルスマーベラス gɯroːbeɾɯsɯmaːbeɾasɯ
+孤児 ko̞dʒi
+買い物 kɑimono
+同性 do̞ːseː
+ざっくばらん zɑʔkɯbɑrɑn
+小宮山 komijɑmɑ
+ビッグ biʔgɯ
+ジーンズ dʒiːnzɯ
+オペレーター opereːtɑː
+今後 ko̞ngo̞
+擦れ sɯɾe
+在り方 ɑrikɑtɑ
+広め hiro̞me
+リュック rʲɯʔkɯ
+組める kɯme̞rɯ
+いただけ itɑdɑke
+にぎわう nigiɯaɯ
+人材 dʒinzɑi
+みんな minnɑ
+オネエ o̞neː
+聖 se̞ː
+じゃん dʒɑ n
+古く ɸɯrɯkɯ
+かかし kɑkɑʃi
+支援 ʃie̞n
+知 tʃi
+面くらい men kɯrɑi
+三洋電機 saɲɯdɛnki
+競わ kisoːa
+公演 ko̞ɯen
+スケッチブック sɯketʃibɯʔkɯ
+様相 jo̞ːso̞ː
+中旬 tʃɯːdʒɯn
+春香 hɑrɯkɑ
+アカウント ɑkɑɯnto
+万が一 mɑngɑitʃi
+エコ eko̞
+流行っ rʲɯːko̞ɯʔ
+フレーズ ɸɯre̞ːzɯ
+朝日 ɑsɑhi
+別姓 be̞ʔse̞ː
+オーボエ o̞ːbo̞e
+戦う tɑtɑkɑɯ
+閉会 heːkɑi
+陽気 jo̞ɯki
+時々 to̞kido̞ki
+倖田 koːdɑ
+眩しい mɑbɯʃiː
+強度 kʲo̞ɯdo̞
+やめろ jɑmero
+キープ kiːpɯ
+考えれ kɑngɑere
+退勤 tɑikin
+長い nɑgɑi
+独占 dokɯzen
+舘野 tɑteno
+滑れる sɯbeɾeɾɯ
+城下町 dʒoːkɑ tʃoː
+祐未 jɯmi
+それら soɾeɾɑ
+一際 hitokiɯa
+学府 gɑkɯɸɯ
+焦点 ʃo̞ɯten
+任せる makaseɾɯ
+賛意 sɑni
+定額 teːgɑkɯ
+岡田 okɑdɑ
+模索 mosakɯ
+引退 intɑi
+感染 kɑnsɯn
+磨き migɑki
+ファインダー ɸɑindɑː
+だら dɑrɑ
+察し sɑsʃi
+草 kɯsa
+押さえ osɑe
+乱調 rɑntʃoː
+平坦 heːtɑn
+すごせる sɯɣoseɾɯ
+一括 iʔkɑt͡sɯ
+脱 dɑt͡sɯ
+し ʃi
+所詮 ʃo̞zɯn
+なんか nɑn kɑ
+悪知恵 ɑkɯtʃie
+むずかしい mɯzɯkɑʃiː
+宿舎 ʃɯkɯʃɑ
+選ば erɑbɑ
+謝罪 ʃɑzɑi
+廃校 hɑikoː
+開発 kɑihɑt͡sɯ
+ペンシルベニア penʃirɯbeniɑ
+飛行 hiko̞ɯ
+つまるところ t͡sɯmɑrɯ tokoro
+所 to̞ko̞ro̞
+び bi
+人臭く hitokɯsakɯ
+加賀 kɑgɑ
+いびつ ibit͡sɯ
+公衆 ko̞ɯʃɯː
+ひとり hito̞ri
+きる kirɯ
+芳香 ho̞ɯko̞ɯ
+下さい kɯdasai
+迷っ mɑjoʔ
+繰り返し kɯrikɑeʃi
+任せろ mɑkɑseɾo
+ゴミ go̞mi
+虚しく mɯnɑʃikɯ
+裸足 hɑdɑʃi
+審判 ʃinpɑn
+解 kɑi
+巷 tʃimɑtɑ
+取っ to̞ʔ
+メロメロ mero̞mero̞
+戦わ tatakaɯa
+無職 mɯʃo̞kɯ
+仁 hito̞ʃi
+守れる mɑmorerɯ
+ゆっくり jɯʔkɯri
+卓越 tɑkɯet͡sɯ
+対価 tɑikɑ
+社屋 ʃɑokɯ
+霊 re̞ː
+ラブラブ rɑbɯ rɑbɯ
+週 ʃɯː
+弓なり jɯminɑri
+正月 ʃoːgɑt͡sɯ
+麦茶 mɯgitʃɑ
+まるで mɑrɯde
+花粉 kɑɸɯn
+レジ re̞dʒi
+無事 bɯdʒi
+三住 sanʒɯw
+スピーカー sɯpiːkaː
+遊ぼ ɑsobo
+敷か ʃiki kɑ
+直江 nɑoe
+はた hɑtɑ
+使い分ける t͡sɯkaiɯakerɯ
+リベンジ ribe̞ndʒi
+断食 dɑndʒiki
+抗議 ko̞ɯgi
+銅 do̞ɯ
+県議会 ken gikɑi
+強力 kʲo̞ɯrʲo̞kɯ
+相互 so̞ːɡo̞
+ナウ nɑɯ
+とんでも to̞ndemo̞
+どういう do̞ɯ iɯ
+解雇 kɑiko
+ブーム bɯːmɯ
+望ま nozomɑ
+アーティスト aːtisɯto
+武生 tɑkeɸɯ
+吐き hɑki
+賠償 bɑiʃoː
+パラソル paɾasoɾɯ
+間取り mɑdori
+いかに ikɑni
+それにしても so̞reniʃitemo̞
+採集 saishɯː
+ねん ne̞n
+はさん hɑsɑn
+設定 se̞ʔte̞ː
+感じる kɑndʒirɯ
+ソロバン soɾobɑn
+中央 tʃɯːo̞ɯ
+言っ iʔ
+いただけれ itɑdɑkere
+社 ʃɑ
+惜しま oʃimɑ
+棄却 kikʲɑkɯ
+逆風 gʲɑkɯɸɯː
+不能 ɸɯno̞ɯ
+松山 mɑt͡sɯjɑmɑ
+火消し hike̞ʃi
+なごや nɑgojɑ
+初詣 hɑt͡sɯmoːde
+老若男女 roːɲɑkɯ dɑndʒo
+フラ ɸɯrɑ
+見直せ minɑose
+当たり ɑtɑri
+放題 hoːdɑi
+移ろい ɯt͡sɯro̞i
+アイドル ɑidorɯ
+問え to̞e
+友だち tomodɑtʃi
+青信号 ɑo ʃingoː
+三枚目 sɑmmɑime
+ブヨ bɯjo̞
+退官 tɑikɑn
+議会 gikɑi
+音信 o̞nʃin
+セラーテム seraːtemɯ
+圏 ke̞n
+卸 o̞ro̞ʃi
+芸能 geːno̞ː
+抗ヒスタミン剤 koːhisɯtaminzai
+放ったらかし hoʔtɑrɑkɑʃi
+巨大 kʲodɑi
+生ま ɯmɑ
+遊撃手 jɯːge̞kiʃɯ
+ちがう tʃigɑɯ
+か月 kɑget͡sɯ
+臼歯 kʲɯːʃi
+気っぷ kiʔpɯ
+築 tʃikɯ
+気さく kisakɯ
+助け tasɯke
+水中 sɯitʃɯː
+しょっぱけれ ʃoʔpɑkere
+呆れ ɑkire
+ソロ so̞ɾo̞
+よし jo̞ʃi
+シカ ʃikɑ
+長引い nɑgɑbii
+じゃっ dʒɑʔ
+ゆったり jɯʔtɑri
+ところで to̞ko̞ro̞ de
+括り kɯkɯri
+銘柄 meːgɑrɑ
+今秋 ko̞nʃɯː
+弊害 heːgɑi
+リンゴ ringo̞
+マッコーリー mɑʔkoːriː
+教授 kʲo̞ɯdʒɯ
+韓国 kɑnkokɯ
+ブレーン bɯre̞ːn
+成し遂げる nɑʃitogerɯ
+スーパースター sɯːpaːsɯtaː
+スペース sɯpeːsɯ
+震わせ fɯɾɯwase
+うれしい ɯre̞ʃiː
+希薄 kihɑkɯ
+さっと sɑʔto
+衝動 ʃo̞ɯdo̞ɯ
+国仲 kɯninɑkɑ
+講師 ko̞ɯʃi
+御飯 gohɑn
+無形 mɯke̞ː
+繊維状 sẽiʒo̞ː
+有する jɯːsɯɾɯ
+編み出し ɑmidɑʃi
+飛び交う tobikɑɯ
+取り返せ toɾikɑeze
+活 kɑt͡sɯ
+中京 tʃɯːkʲo̞ɯ
+わき ɯaki
+ほころばす hokorobasɯ
+リョウタ rʲoː tɑ
+門真 kɑdomɑ
+謙遜 kɛnsɔn
+帆 ho̞
+八卦 hɑʔke
+脈絡 mʲɑkɯrɑkɯ
+習っ nɑrɑʔ
+ペンクラブ pen kɯrɑbɯ
+役柄 jɑkɯgɑrɑ
+粘着 nentʃɑkɯ
+失礼 ʃit͡sɯre̞ː
+観測 kansokɯ
+書き kɑki
+もっと mo̞ʔto̞
+けん引 ke̞nin
+マヌケ mɑnɯke
+キラキラ kirɑkirɑ
+上機嫌 dʒo̞ɯkigen
+海江田 kɑiedɑ
+初恋 hɑt͡sɯkoi
+マーケティング mɑːketingɯ
+従わ ʃitagaɯa
+お薦め osɯsɯme
+実は dʒit͡sɯ ɯa
+アトランタ ɑtorɑntɑ
+得 to̞kɯ
+巧妙 ko̞ɯmʲo̞ɯ
+熊 kɯmɑ
+贈り物 o̞kɯrimo̞no̞
+ヤフー jɑɸɯː
+保守 ho̞ʃɯ
+パーティー pɑːtiː
+隻句 seʔkɯ
+県 ke̞n
+美味し o̞iʃi
+みよ mijo̞
+決済 kesɯɑi
+有名 jɯːme̞ː
+本命 ho̞nmeː
+カス kasɯ
+強 kʲo̞ɯ
+グループ gɯrɯːpɯ
+にぎわっ nigiɯaʔ
+じゃあ dʒɑː
+インテル inte̞rɯ
+何者 nɑnimono
+仮設 kɑsetsɯ
+苦悩 kɯno̞ɯ
+どころ do̞ko̞ro̞
+フンドシ ɸɯndo̞ʃi
+着心地 kigo̞ko̞tʃi
+加わり kɯɯaɯari
+高配 koːhɑi
+ファイリング ɸɑiringɯ
+鍛え kitɑe
+切れ味 kireɑdʒi
+こじれ ko̞dʒire
+悲しき kɑnɑʃiki
+闇 jɑmi
+緩み jɯrɯmi
+清める kijo̞merɯ
+過失 kɑʃit͡sɯ
+ベリー be̞riː
+創痍 so̞ːi
+四面楚歌 ʃimensokɑ
+含ま ɸɯkɯmɑ
+片言 kɑtɑkoto
+写そ ɯtsɯso
+生き残っ ikino̞ko̞ʔ
+フード ɸɯːdo̞
+コスト kosɯto
+ひょっとしたら çoʔto ʃitɑrɑ
+異色 iʃo̞kɯ
+落とし o̞to̞ʃi
+遠隔 enkɑkɯ
+隊 tɑi
+棒 bo̞ɯ
+オーディオ o̞ːdio̞
+誉め ho̞me
+返り咲き kɑerizɑki
+人柄 hitogɑrɑ
+会社 kɑiʃɑ
+清涼 seːɾjo̞ː
+造り酒屋 tsɯkɯrisakaja
+客員 kʲɑkɯin
+きわめて kiɯamete
+ラベル rɑberɯ
+アサガオ ɑsɑɣɑo
+徹底 te̞ʔte̞ː
+冷凍 reːto̞ɯ
+充実 dʒɯːdʒit͡sɯ
+武道 bɯdo̞ɯ
+取り出し toridɑʃi
+ひるがえって hirɯgɑeʔte
+絞り ʃibo̞ri
+英樹 hide̞ki
+テレビ朝日 terebiɑsɑhi
+やめる jɑmerɯ
+機械 kikɑi
+引き受ける hikiɯke̞rɯ
+魔法 mɑhoː
+空席 kɯvseki
+の no̞
+長老 tʃo̞ɯro̞ɯ
+いらっしゃっ iɾɑʔsʃə
+ほか hokɑ
+もしかして moʃi kɑ ʃite
+ブランド bɯrɑndo
+吐露 to̞ro̞
+嫌っ ijɑʔ
+部品 bɯhin
+冷 re̞ː
+そんなふうに sonnafɯɯni
+トー to̞ː
+安い jasɯi
+譲っ jɯzɯʔ
+福吉 ɸɯkɯjo̞ʃi
+防げ fɯsege
+実機 dʒiʔki
+打っ ɯʔ
+米 be̞ː
+本体 hontɑi
+ぶっつけ bɯʔt͡sɯke̞
+握り nigiri
+新宿 ʃindʒɯkɯ
+暴投 bo̞ɯto̞ɯ
+受け取っ ɯketo̞ʔ
+架空 kɑkɯː
+車検 ʃɑken
+持ち出す motʃidasɯ
+東京電力 to̞kʲo̞ denrʲo̞kɯ
+ゴレンジャー gorendʒɑː
+三塁手 sanrɯiʃɯ
+採光 sɑikoː
+羽織 hɑori
+密閉 miʔpe̞i
+釘付け kɯgizɯke̞
+鍋 nɑbe
+全 ze̞n
+喋る ʃɑberɯ
+くじか kɯdʒi kɑ
+仕入れ ʃiire̞
+フェリー ɸe̞riː
+付着 ɸɯtʃɑkɯ
+調べれ ʃirɑbere
+状態 dʒoːtɑi
+融資 jɯːʃi
+下り kɯdɑri
+習慣 ʃɯːkɑn
+石 iʃi
+探知 tɑntʃi
+懐疑 kɑigi
+社内 ʃɑnɑi
+作成 sakɯseː
+傾向 keːko̞ː
+堤 t͡sɯt͡sɯmi
+もうまい moː mɑi
+梶山 kɑdʒijɑmɑ
+寝転がり nekorogɑri
+使用 ʃijo̞ɯ
+返金 he̞nkin
+オファー oɸɑː
+捕食 ho̞ʃo̞kɯ
+一つひとつ hito̞t͡sɯ hito̞t͡sɯ
+義捐 gie̞n
+ストラテジスト sɯtoratexisɯto
+感じ kɑndʒi
+パーフェクト pɑːɸekɯto
+アクセサリ akɯsesaɾi
+セクター sekɯtaː
+にんにく ninnikɯ
+成分 seːbɯn
+体 kɑrɑdɑ
+阿部 ɑbe
+是正 ze̞se̞ː
+ハマる hɑmɑrɯ
+第 dɑi
+下っ端 ʃitɑʔpɑ
+働き hɑtɑrɑki
+たぶん tɑbɯn
+神戸製鋼 ko̞ːbeseiko̞ː
+クリスマスイブ kɯrisɯmasɯibɯ
+ソウルメイト sowɾɯmeito
+宮地 mijɑdokorono
+握る nigirɯ
+掘り下げ hoɾisɑʒe
+ホンダ hondɑ
+元気づける ge̞nki zɯke̞rɯ
+小型 kogɑtɑ
+ゼロ zero̞
+激震 ge̞kiʃin
+麻衣 mɑi
+級 kʲɯː
+分け ɯake
+時期 dʒiki
+津波 t͡sɯnɑmi
+凋落 tʃoːrɑkɯ
+英明 hideɑki
+公明党 ko̞ːmeːto̞ː
+甑 ko̞ʃiki
+幸福 ko̞ɯɸɯkɯ
+披露 hiro̞ɯ
+ハードル hɑːdorɯ
+惨め midʒime̞
+氣的 kite̞ki
+アーモンド ɑːmondo
+歴 re̞i
+続ける t͡sɯzɯke̞rɯ
+存分 zo̞nbɯn
+やや jɑjɑ
+後悔 koːkɑi
+ネガティブ negɑtibɯ
+ワザ ɯaza
+熱意 ne̞t͡sɯi
+根源 ko̞ngen
+触発 ʃokɯhɑt͡sɯ
+絞っ ʃibo̞ʔ
+たって tɑʔte
+げんか genkɑ
+改革 kɑikɑkɯ
+じっと dʒiʔto̞
+不妊 ɸɯnin
+水本 mizɯmo̞to̞
+応 o̞ɯ
+ふた ɸɯtɑ
+ィ i
+歴史 re̞kiʃi
+冴えわたっ saeɯataʔ
+思いつき o̞mo̞it͡sɯki
+踏み出す fɯmidasɯ
+硫酸 rjɯːsan
+やり方 jɑrikɑtɑ
+推し進め osisɯsɯme
+悩む nɑjɑmɯ
+華やか hɑnɑjɑkɑ
+地方 tʃiho̞ɯ
+杉内 sɯgiɯtʃi
+こう ko̞ɯ
+貸借 tɑiʃɑkɯ
+ベストテン besɯtoten
+尾崎 ozɑki
+搬送 hɑnsoː
+行き場 jɯkibɑ
+容貌 jo̞ɯbo̞ɯ
+営み itonɑmi
+見守る mimɑmorɯ
+風早 kɑzɑhɑjɑ
+師 ʃi
+郵便受け jɯːbin ɯke̞
+ありのまま ɑrinomɑmɑ
+低率 te̞ːrit͡sɯ
+叫び sɑkebi
+仮定 kɑteː
+永田町 nɑgɑtɑtʃoː
+宿る jɑdorɯ
+絶妙 zet͡sɯmʲo̞ɯ
+思い立っ omoitɑʔ
+最大 sɑidɑi
+ランク rɑnkɯ
+オルニチン o̞rɯnitʃin
+本編 ho̞npen
+恩 o̞n
+主張 ʃɯtʃo̞ɯ
+他力本願 tɑriki hongɑn
+涙 nɑmidɑ
+言い渡し iiɯataʃi
+リバウンド ribɑɯndo
+親日 ʃinnitʃi
+栄養 eːjo̞ː
+抹消 mɑʔʃoː
+茶道 sɑdoː
+紳士 ʃinʃi
+チラ tʃirɑ
+抜け道 nɯke̞mitʃi
+つまずく t͡sɯmɑzɯkɯ
+成ら nɑrɑ
+関東学院大学 kɑntoː gɑkɯin dɑigɑkɯ
+悲劇 hige̞ki
+アドバイザリーボード ɑdobɑizɑriː boːdo
+迫っ semɑʔ
+浩 hiro̞ʃi
+傲慢 goːmɑn
+ウルフ ɯrɯɸɯ
+障害 ʃoːgɑi
+毛 ke̞
+ティーザー tiːzɑː
+たび tɑbi
+あと ɑto
+文部 mo̞nbɯ
+許 mo̞to̞
+とく to̞kɯ
+比 hi
+呼び捨て jobisɯte
+おろか orokɑ
+泳ぐ o̞jo̞gɯ
+医師 iʃi
+戻し mo̞do̞ʃi
+栞 ʃio̞ri
+モスクワ mosɯkɯwa
+ランニング rɑnningɯ
+恵子 keːko̞
+ダンス dansɯ
+舟 ɸɯne̞
+忍耐 nintɑi
+オルタナ orɯ tɑnɑ
+辻井 t͡sɯdʒii
+再び ɸɯtɑtɑbi
+うごめく ɯgo̞mekɯ
+愕然 gɑkɯzen
+同社 doːʃɑ
+難易 nɑni
+黒髪 kɯrokɑmi
+脱い nɯi
+対策 taisakɯ
+共存 kʲo̞ɯzo̞n
+熊谷 kɯmɑgɑi
+何事 nɑnigoto
+誰 dɑre
+ブロウンディフューザー bɯroɯndiɸjɯːzɑː
+プリンター pɯrintɑː
+魚市場 sɑkɑnɑʃiʒoː
+気味 kimi
+人格 dʒinkɑkɯ
+ファッション fɑsɕɔn
+アルマ ɑrɯmɑ
+風邪 kɑze
+改め ɑrɑtɑme
+万博 bɑnpɑkɯ
+ミナト minɑto
+黒字 kɯro̞dʒi
+柳田 jɑnɑgidɑ
+亀戸 kɑmeido
+剃り so̞ɾi
+化粧 keʃo̞ɯ
+ランダム rɑndɑmɯ
+大通り o̞ːdo̞ːri
+後半 koːhɑn
+税収 ze̞ːʃɯː
+パーク pɑːkɯ
+持ち上がっ motʃiɑgɑʔ
+ウェブサイト ebɯzaito
+安否 ɑnpi
+シンカンセン ʃinˈkɑnsən
+共演 kʲo̞ɯen
+知識欲 tʃiʃiki jo̞kɯ
+観客 kɑnkʲɑkɯ
+買いだめ kɑidɑme
+平凡 heːbo̞n
+びっくり biʔkɯri
+産ま ɯmɑ
+タッグ tɑʔgɯ
+策 sakɯ
+早足 hɑjɑːʃi
+結婚 keʔko̞n
+快適 kɑiteki
+パワー paɯaː
+小 ʃo̞ɯ
+要 jo̞ɯ
+あせ ɑse
+贈答 zo̞ɯto̞ɯ
+野良 norɑ
+鈴川 sɯzɯkawa
+回答 kɑitoː
+感じ取っ kɑndʒitoʔ
+増強 zo̞ɯkʲo̞ɯ
+粋 iki
+和やか nɑgojɑkɑ
+準々 dʒɯndʒɯn
+書記 ʃo̞ki
+ミヤネ mijɑne
+瑞穂 mizɯho̞
+外せ hazɯze
+昔 mɯkɑʃi
+お披露目 o̞hiro̞me
+クル kɯrɯ
+方々 kɑtɑgɑtɑ
+騙し dɑmɑʃi
+行財政 gouzajseː
+資格 ʃikɑkɯ
+返答 hento̞ɯ
+マーク mɑːkɯ
+単独 tɑndokɯ
+繋い t͡sɯnɑi
+産む ɯmɯ
+側室 sokɯʃitsɯ
+必殺 hisatsɯsɯ
+憮然 bɯze̞n
+クリスチャン kɯɾisɯxan
+携帯 keːtɑi
+草花 kɯsabana
+番兵 bɑnpeː
+刈り gɑri
+社名 ʃɑmeː
+最愛 sɑjɑj
+しれ ʃire̞
+オフ o̞ɸɯ
+ジオスター dʒiosutaː
+大証 dɑiʃoː
+お金 okɑne
+それなら soɾenɑɾɑ
+恋 ko̞i
+タレ tɑre
+オーブン o̞ːbɯn
+ペレ pe̞re̞
+由里子 jɯriko̞
+厚い ɑt͡sɯi
+おしゃれ oʃɑre
+挙げよ ɑgejo
+採決 sɑiketsɯ
+騎馬 kibɑ
+疲れる t͡sɯkɑrerɯ
+アンカー ɑnkɑː
+つきもの t͡sɯkimo̞no̞
+フィッシング fɪʔseiŋɡɯ
+邪魔 dʒɑmɑ
+しぶとく ʃibɯto̞kɯ
+こっ ko̞ʔ
+意気揚々 iki jo̞ɯjo̞ɯ
+たたか tɑtɑ kɑ
+地理 tʃiri
+な nɑ
+再掲 sɑikeː
+激し ge̞kiʃi
+殊勝 ʃɯʃo̞ɯ
+完璧 kɑnpeki
+規模 kibo̞
+梅干 ɯmebo̞ʃi
+大方 oːkɑtɑ
+渡し ɯataʃi
+保ち tɑmotʃi
+湿っぽ ʃimeʔpo̞
+参加 sɑnkɑ
+だらけ dɑrɑke
+モジュール mo̞dʒɯːrɯ
+男の子 o̞to̞ko̞no̞ko̞
+對 tɑi
+景気 ke̞ːki
+無償 mɯʃo̞ɯ
+活性 kɑʔseː
+根こそぎ neko̞so̞gi
+経済学部 keːzɑigɑkɯ bɯ
+ゆえ jɯe̞
+伝わり t͡sɯtaɯari
+見覚え mio̞bo̞e
+売れ ɯre̞
+収集 ʃɯːʃɯː
+一部 itʃibɯ
+なんとか nɑn to kɑ
+持てる mo̞terɯ
+長袖 nɑgɑsodɛ
+通い kɑjoi
+非核 hikɑkɯ
+確保 kɑkɯho
+トオル to̞ːrɯ
+近 kin
+減り he̞ri
+天秤棒 tenbin bo̞ɯ
+プリテキスティング pɯritekisɯtiᵑgɯ
+逝雄 ??
+ボルト bo̞rɯto̞
+スポーツエグゾースト sɯpoːtsɯegɯzoːsɯto
+めまい memɑi
+エージェント eːdʒento̞
+揺るが jɯrɯgɑ
+半島 hɑntoː
+しみつい ʃimit͡sɯi
+自己流 dʒiko̞rʲɯː
+救わ sɯkɯwa
+張りつめ hɑrit͡sɯme
+コラボ korɑbo
+ひるま hirɯmɑ
+聞か kikɑ
+中国共産党 tʃɯːgokɯkjoːsantoː
+気味悪 kimi ɑkɯ
+同盟 do̞ːmeː
+贈呈 zo̞ːteː
+よりけり jo̞ri keri
+既報 kiho̞ɯ
+長生き nɑgɑiki
+決め kime̞
+二酸化炭素 nisɑnkɑtɑnso
+劣化 reʔkɑ
+現われる araɯarerɯ
+晴らす haɾasɯ
+真里 mɑri
+ビデオ bideo̞
+オーベクス oːbekɯsɯ
+アラフォー ɑrɑ ɸoː
+加え kɯɯae
+制度 seːdo̞
+左上 hidɑriɯe
+観れ mire̞
+ウワサ ɯwasa
+噛め kɑme
+ムシャラフ mɯʃɑrɑɸɯ
+勝つ kɑt͡sɯ
+力説 ɾikise̞tsɯ
+のっぺらぼう noʔperɑboː
+形式 ke̞ːʃiki
+語り口 kɑtɑrikɯtʃi
+高速 koːsokɯ
+妥当 dɑtoː
+住み込み sɯmikomi
+ミサ misɑ
+営む itonɑmɯ
+けい子 keːko̞
+下がり sɑgɑri
+家中 kɑtʃɯː
+東方 to̞ɯho̞ɯ
+絶望 zet͡sɯbo̞ɯ
+厚 ɑt͡sɯʃi
+男女 dɑndʒo
+顰蹙 hinʃɯkɯ
+ひねる hine̞rɯ
+干渉 kɑnʃoː
+決まれ kimɑre
+ろ ro̞
+面白く o̞mo̞ʃiro̞kɯ
+上げ幅 ɑgehɑbɑ
+ごみ箱 gomibɑko
+縛り ʃibɑri
+決して kɛʔʃite̞
+チーズフォンデュ tʃiːzɯ ɸo̞ndjɯ
+探せる sagaseɾɯ
+打ち出す ɯtʃidasɯ
+越年 e̞t͡sɯne̞n
+西澤 niʃizaɯa
+グラウンド gɯrɑɯndo
+期間 kikɑn
+好評 ko̞ɯço̞ɯ
+沖 o̞ki
+フェロー ɸero̞ː
+コピー ko̞piː
+お話 ohɑnɑʃi
+撤去 teʔkʲo̞
+洞爺湖 toːjɑko
+入れ墨 ire̞zɯmi
+おやじ ojɑdʒi
+謝れ ɑjɑmɑre
+のみ no̞mi
+バスレフ basɯrefɯ
+宝 tɑkɑrɑ
+集金 ʃɯːkin
+添付 te̞npɯ
+闇雲 jɑmikɯmo
+のし上がる noʃiɑgɑrɯ
+三省堂 sɑnseːdoː
+新居 ʃinkʲo̞
+アーバン ɑːbɑn
+除け jo̞ke
+脇 ɯaki
+サイト sɑito
+ケアレスミス kearesɯmisɯ
+こぼれ ko̞bo̞re
+切り離す kiɾihanasɯ
+モン mo̞n
+献立 kondɑte
+見逃す minoɰᵝasɯ
+砂鉄 sɑtetsɯ
+予断 jodɑn
+再建 sɑiken
+複雑 ɸɯkɯzɑt͡sɯ
+貶す kenasɯ
+吹き飛ばす fɯkitobasɯ
+ガイ gɑi
+ウェア eɑ
+テクノロジー tekɯno̞ro̞dʒiː
+反町 soɾimɑtʃi
+末 sɯe
+驚 o̞do̞ro̞ki
+農園 no̞ɯen
+湯川 jɯkaɯa
+つわり t͡sɯɯari
+はばたか hɑbɑtɑkɑ
+メリハリ merihɑri
+おこ o̞ko̞
+屋外 okɯgɑi
+醍醐味 dɑigomi
+利き手 kikite̞
+勤め先 tsɯtomesɑki
+血縁 ke̞t͡sɯe̞n
+稀有 ke̞ɯ
+球史 kʲɯːʃi
+行き先 ikisɑki
+疎開 sokɑi
+分かれる ɯakarerɯ
+土壇場 dotɑnbɑ
+何でもかんでも nɑn de mo kɑn de mo
+協力 kʲo̞ɯrʲo̞kɯ
+蛇 he̞bi
+掘り起こさ hoɾjokosɑ
+直ちに tɑdɑtʃini
+確認 kɑkɯnin
+頻繁 hinpɑn
+頂き itɑdɑki
+先達 sendɑtsɯ
+なぜなら nɑze nɑrɑ
+ふらつき ɸɯrɑt͡sɯki
+議論 giro̞n
+福 ɸɯkɯ
+住之江 sɯminoe
+おくり o̞kɯri
+注意 tʃɯːi
+おこし o̞ko̞ʃi
+警察庁 keisɑtsɯʃoː
+路面 ro̞men
+芸人 ge̞ːnin
+あっけらかん ɑʔkerɑkɑn
+やみつき jɑmit͡sɯki
+もらわ moraɯa
+当事者 toːdʒiʃɑ
+強豪 kʲo̞ɯgo̞ɯ
+進む sɯsɯmɯ
+買い越し kɑikoʃi
+最先端 sɑisentɑn
+リクルーター rikɯrɯːtɑː
+内科 nɑikɑ
+過ごせ sɯɣose
+新芽 ʃinme̞
+産油 saɲjɯ
+渡辺 ɯatanabe
+シャビー ʃɑbi
+南極 nɑnkʲokɯ
+検討 kento̞ɯ
+もったいない moʔtɑi nɑi
+小屋 kojɑ
+張り bɑri
+剥がす hagasɯ
+臼杵 ɯsɯki
+各々 o̞no̞ːno̞
+読み取れる jo̞mito̞rerɯ
+ビジネスチャンス bidʒinesutʃansu
+幹事 kɑndʒi
+塗り替える nɯrikɑerɯ
+荷主 ninɯʃi
+外為 gɑitɑme
+三笠 mikɑsɑ
+所有 ʃo̞jɯː
+謝る ɑjɑmɑrɯ
+起毛 kimo̞ɯ
+格 kɑkɯ
+今中 imɑnɑkɑ
+内山 ɯtʃijɑmɑ
+成田 nɑritɑ
+満 mɑn
+母親 hɑhɑojɑ
+導き mitʃibiki
+撫で nɑde
+誌 ʃi
+首相 ʃɯʃo̞ɯ
+発射 hɑsɕɑ
+テーマパーク teːmɑ pɑːkɯ
+ビジュアル bidʒɯɑrɯ
+常温 dʒo̞ɯo̞n
+強め t͡sɯjo̞me
+企ん tɑkɯrɑn
+保育園 ho̞ikɯen
+かい kɑi
+べし be̞ʃi
+建前 tɑtemɑe
+輸出 jɯʃɯt͡sɯ
+ローカル roːkɑrɯ
+浮き彫り ɯkibo̞ri
+にじむ nidʒimɯ
+優雅 jɯːgɑ
+恥辱 tʃidʒo̞kɯ
+そろ so̞ɾo̞
+取り出す toɾidasɯ
+一礼 itʃi re̞ː
+しゃべれ ʃɑbere
+資源 ʃige̞n
+手品 tedʒinɑ
+一節 ise̞tsɯ
+上流 dʒo̞ɯrʲɯː
+旋風 senpɯː
+最 sɑi
+見出し midɑʃi
+普天 ɸɯte̞n
+会誌 kɑiʃi
+略式 rʲɑkɯʃiki
+評価 çoːkɑ
+審査 ʃinsɑ
+送迎 so̞ːɡɛː
+はく hɑkɯ
+パンツ pɑnt͡sɯ
+暴行 bo̞ɯko̞ɯ
+ほんと ho̞nto̞
+不整脈 fɯseːmjakɯ
+ポニーテール po̞niːteːrɯ
+セシル seʃiɾɯ
+車両 ʃɑrʲoː
+ほしき ho̞ʃiki
+本業 ho̞ngʲo̞ɯ
+時には toki ni ɯa
+区 kɯ
+信越 ʃinetsɯ
+法政大学 hoːseːdaigakɯ
+リビング ribingɯ
+破綻 hɑtɑn
+刷新 sɑsɕin
+関係 kɑnkeː
+あたり ɑtɑri
+やぶさか jabɯsaka
+サービス saːbisɯ
+終戦 ʃɯːsen
+て te̞
+命じ me̞ːdʒi
+素晴らしかっ sɯbaɾaʃikaʔ
+全身 ze̞nʃin
+還元 kɑngen
+つくり t͡sɯkɯri
+公共 ko̞ɯkʲo̞ɯ
+三菱自動車工業 mit͡sɯbiʃi dʒidoːʃɑ koːgʲoː
+減らさ heɾɑsɑ
+クルクル kɯrɯkɯrɯ
+据え置い sɯeoi
+通貨 t͡sɯːkɑ
+縦 tɑte
+友好 jɯːko̞ɯ
+モンスーン monsɯːn
+避け sɑke
+畳 tɑtɑmi
+見た目 mitɑ me
+来場 rɑidʒoː
+処方 ʃo̞ho̞ɯ
+政治 se̞ːdʒi
+願望 gɑnboː
+至っ itɑʔ
+死命 ʃime̞ː
+めど medo̞
+淀み jo̞do̞mi
+昌孝 mɑsɑtɑkɑ
+胃がん igɑn
+噛ま kɑmɑ
+好意 ko̞ɯi
+失せる ɯseɾɯ
+座 zɑ
+被告 hiko̞kɯ
+素材 sozɑi
+溜まり tɑmɑri
+先 sɑki
+宣誓 se̞nse̞ː
+激戦 gəkizən
+アンヌ ɑnnɯ
+富豪 ɸɯgo̞ɯ
+少 sɯkɯna
+バイデン bɑiden
+っぱなし ʔpɑnɑʃi
+年始 ne̞nʃi
+似たりよったり nitɑri joʔ tɑri
+奪い取 ɯbɑi tori
+減反 gentɑn
+帰属 kizo̞kɯ
+最終 saishɯː
+ルックス ɾɯʔkɯsɯ
+エスプレッソ esɯpɯɾeso
+同然 do̞ɯzen
+仰い ɔshɑi
+じまう dʒimɑɯ
+見張る mihɑrɯ
+父兄 ɸɯke̞ː
+整合 seːɡo̞ː
+各所 kɑkɯʃo
+引き上げる hikiɑgerɯ
+かか kɑkɑ
+体力 tɑirʲokɯ
+家族 kɑzokɯ
+推奨 sɯiʃoː
+食べ tɑbe
+キス kisɯ
+通れ to̞ːre
+テレ te̞re̞
+いま imɑ
+おびえ o̞bie
+魁皇 kɑioː
+痛撃 t͡sɯːge̞ki
+指輪 jɯbiɯa
+遠慮なく enrʲo nɑkɯ
+定番 teːbɑn
+山々 jɑmɑjɑmɑ
+欠如 ket͡sɯdʒo̞
+われ ɯare
+配ら kɯbɑrɑ
+ツル t͡sɯrɯ
+共 to̞mo̞
+主宰 ʃɯzai
+前述 ze̞ndʒɯt͡sɯ
+太陽 tɑijoː
+故障 ko̞ʃo̞ɯ
+じみ dʒimi
+小分け koːake
+値段 nedɑn
+駅伝 e̞kide̞n
+含む ɸɯkɯmɯ
+断熱 dɑnnet͡sɯ
+暑気 ʃo̞ki
+パナソニック panasonikekɯ
+既婚 kiko̞n
+器量 kirʲo̞ɯ
+笑わせ vɑɾɑvɑze
+おり o̞ri
+郊外 koːgɑi
+錆び sɑbi
+権益 ke̞ne̞ki
+内訳 ɯtʃiɯake
+クン kɯn
+いよいよ ijo̞ijo̞
+ぎくしゃく gikɯʃɑkɯ
+初頭 ʃo̞to̞ɯ
+イスラム isɯɾamɯ
+ね ne̞
+球場 kʲɯːdʒo̞ɯ
+ジンガー dʒingɑː
+言い切る iikirɯ
+衛生 e̞ːse̞ː
+フォント ɸo̞nto̞
+儲から moːkɑrɑ
+追いつく o̞it͡sɯkɯ
+とどまっ todomɑʔ
+上昇 dʒo̞ɯʃo̞ɯ
+しぐれ ʃigɯre̞
+住民 dʒɯːmin
+やさし jɑsɑʃi
+要求 jo̞ɯkʲɯː
+課長 kɑtʃoː
+ミトコンドリア mitokondoriɑ
+身 mi
+どの do̞no̞
+よろしく jo̞ro̞ʃikɯ
+弟 o̞to̞ɯto̞
+聞こえ kiko̞e
+射撃 ʃɑgeki
+一段 itʃi dɑn
+休養 kʲɯːjo̞ɯ
+ひと hito̞
+怪物 kɑibɯt͡sɯ
+要ら irɑ
+とどめる to̞do̞merɯ
+太山 tɑizɑn
+スマート sɯmaːto
+道徳 do̞ɯto̞kɯ
+褒める ho̞merɯ
+ラオス ɾaozɯ
+選手権 sɛnshɯkɛn
+沿い so̞j
+差し入れ sɑʃiiɾe
+調達 tʃoːtɑt͡sɯ
+品格 hinkɑkɯ
+別名 be̞t͡sɯme̞ː
+欲しく ho̞ʃikɯ
+奨励 ʃo̞ːreː
+楽器 gɑʔki
+ジーデータ dʒiː deːtɑ
+ゴルバチョフ gorɯbɑtʃoɸɯ
+レギンス ɾeginsɯ
+窮状 kʲɯːdʒo̞ɯ
+によって ni jo̞ʔte
+おいしい o̞iʃiː
+続 t͡sɯzɯkɯ
+取り戻す toɾimodosɯ
+冒し okɑʃi
+談合 dɑngoː
+棚 tɑnɑ
+力演 rikie̞n
+胸 mɯne̞
+訴訟 so̞ʃo̞ː
+いやいや ijɑijɑ
+生き生き ikiiki
+免疫 me̞ne̞ki
+コンセプト konsepɯto
+追跡 tsɯise̞ki
+持ち味 motʃiɑdʒi
+番 bɑn
+固い kɑtɑi
+水俣病 minɑmɑtɑbʲoː
+助け合う tasɯkeaw
+下ろす oɾosɯ
+うそ ɯso
+使い方 t͡sɯkɑikɑtɑ
+ズーム zɯːmɯ
+共通 kʲo̞ɯt͡sɯː
+劣勢 ɾe̞ʔse̞ː
+本堂 ho̞ndo̞ɯ
+同車 doːʃɑ
+探索 tansakɯ
+塗り替え nɯrikɑe
+ニガテ nigɑte
+法定 ho̞ːteː
+側 gaɯa
+衝く t͡sɯkɯ
+類似 rɯidʒi
+記載 kisɑi
+越える ko̞erɯ
+扇動 sendo̞ː
+喜作 kisakɯ
+案の定 ɑnnodʒoː
+自首 dʒiʃɯ
+顔負け kɑomɑke
+難かっ gɑtɑkɑʔ
+核 kɑkɯ
+至福 ʃiɸɯkɯ
+ウェイト ɯeito
+ジェフリー dʒe̞ɸɯriː
+猪口 tʃo̞ko̞
+伝わっ t͡sɯtaɯaʔ
+移し ɯt͡sɯʃi
+正しかっ tɑdɑʃikɑʔ
+祝っ ʃɯkɯʔ
+身近 mizikɑ
+勧め sɯsɯme
+なあ nɑː
+ニア niɑ
+わんさ vɑnsɑ
+内房 ɯtʃibo̞ɯ
+ファースト faːsɯto
+給え tɑmɑe
+コスメ kosɯme
+構築 ko̞ɯtʃikɯ
+モテ mo̞te
+宮崎 mijɑzɑki
+こもっ ko̞mo̞ʔ
+良品 rʲo̞ɯhin
+とまれ tomɑre
+だり dɑri
+我々 ɯareɯare
+おお o̞ː
+畜 tʃikɯ
+鎖国 sakokɯ
+話しかける hɑnɑʃikɑkerɯ
+生まれ ɯmɑre
+富士山 ɸɯdʒi jɑmɑ
+ずれ zɯre̞
+助走 ʒo̞so̞ː
+わくわく ɯakɯɯakɯ
+ブレア bɯreɑ
+数える kɑzoerɯ
+強まっ t͡sɯjomɑʔ
+ギャランドゥ gʲɑrɑndɯ
+つかん t͡sɯkɑn
+次々 t͡sɯgit͡sɯgi
+やつ jɑt͡sɯ
+ポート po̞ːto̞
+訪れ o̞to̞zɯre
+料金 rʲo̞ɯkin
+頼ら tɑjorɑ
+茨城 ibɑrɑki
+入賞 ɲɯːʃo̞ɯ
+取り付け to̞rit͡sɯke
+義援金 gie̞nkin
+バー bɑː
+精密 se̞ːmitsɯ
+歌っ ɯtɑʔ
+三菱商事 mit͡sɯbiʃi ʃo̞ɯdʒi
+閥 bɑt͡sɯ
+下旬 ge̞dʒɯn
+あり方 ɑrikɑtɑ
+古い ɸɯrɯi
+長く nɑgɑkɯ
+許可 kʲokɑ
+だれ dɑre
+かき立て kɑkitɑte
+中島 nɑkɑdʒimɑ
+特に to̞kɯni
+けど kedo̞
+薬価 jɑʔkɑ
+晩年 bɑnnen
+答弁 to̞ɯben
+反射 hɑnʃɑ
+通報 t͡sɯːho̞ɯ
+脱稿 dɑʔkoː
+情勢 ʒoːseː
+重なり kɑsɑnɑɾi
+自制 dʒiseː
+押し流さ osinɑɣɑsɑ
+水虫 mizɯmɯʃi
+漏洩 ro̞ːeː
+悲しく kɑnɑʃikɯ
+金額 kingɑkɯ
+とおり to̞ːri
+陶器 to̞ɯki
+すっかり sɯʔkari
+について ni t͡sɯite̞
+ばん bɑn
+雪だるま jɯki dɑrɯmɑ
+徳 to̞kɯ
+かすか kasɯka
+妨害 boːgɑi
+症状 ʃo̞ɯdʒo̞ɯ
+ステイタス sɯteitasɯ
+横須賀 jokosɯka
+にらみ nirɑmi
+ひそか hisokɑ
+受注 dʒɯtʃɯː
+毛色 keiro̞
+価値 kɑtʃi
+ベルギー be̞rɯgiː
+選択肢 sentakɯʃi
+湘南 ʃoːnɑn
+電波 denpɑ
+毎晩 mɑibɑn
+過ごし sɯɡoʃi
+見直さ minɑosɑ
+渦中 kɑtʃɯː
+早けれ hɑjɑkere
+平日 he̞ːdʒit͡sɯ
+傷つけ kizɯt͡sɯke̞
+抜き取り nɯkito̞ri
+帰ら kɑerɑ
+芝居 ʃibɑi
+ヨード jo̞ːdo̞
+ぶつけ bɯt͡sɯke̞
+ごきげんよう go̞kigeɲo̞ɯ
+不安 ɸɯɑn
+康夫 jasɯo
+強行 kʲo̞ɯko̞ɯ
+追悼 t͡sɯito̞ɯ
+一生 iʔso̞ː
+間柄 ɑidɑgɑrɑ
+佐古 sɑko
+主人公 ʃɯdʒinko̞ɯ
+巧み tɑkɯmi
+アステラス asɯteɾasɯ
+申し訳 moːʃiɯake
+セメント semento̞
+居座っ isɯwaʔ
+そぐ soɰᵝabɯ
+映画 eːgɑ
+科目 kɑmokɯ
+真相 ʃinso̞ː
+救急 kʲɯːkʲɯː
+ポータブル poːtɑbɯrɯ
+知る ʃirɯ
+できあがり dekiɑgɑri
+タガログ tɑgɑrogɯ
+社交 ʃɑkoː
+とれる to̞rerɯ
+超え ko̞e
+成金 nɑrikin
+金賢姫 kin ke̞nki
+整理 se̞ːɾi
+潮 ʃio̞
+倒せ tɑoze
+現金 ge̞nkin
+栄え hɑe
+続く t͡sɯzɯkɯ
+那須 nasɯ
+車いす kɯrɯmaisɯ
+運営 ɯ̃eː
+九分九厘 kʲɯː ɸɯn kʲɯː rin
+ニオイ nio̞i
+偵察 teːsɑtsɯ
+内田 ɯtʃidɑ
+望ましい nozomɑʃiː
+傾き kɑtɑmɯki
+第三国 daisankokɯ
+ニーズ niːzɯ
+磨ける migɑkerɯ
+週末 ʃɯːmɑt͡sɯ
+セーフ seːfɯ
+ねぶた nebɯtɑ
+噛ん kɑn
+相性 ɑiʃoː
+半身不随 hɑnʃin ɸɯzɯi
+森永乳業 morinɑgɑ ɲɯːgʲoː
+駆っ kɑʔ
+パセリ pɑseɾi
+照明 ʃo̞ɯmeː
+小幅 kohɑbɑ
+わけ ɯake
+来季 rɑiki
+知り合い ʃiriɑi
+奇抜 kibɑt͡sɯ
+ミライース miraiːsɯ
+権利 ke̞nri
+風 kɑze
+大金持ち oːkɑnemotʃi
+発音 hɑt͡sɯon
+ことごとく ko̞to̞go̞to̞kɯ
+来年度 rɑinendo
+歓迎 kɑngeː
+取り外す toɾihazɯsɯ
+中尾 nɑkɑo
+やすい jasɯi
+組み立て kɯmitɑte
+デモンストレーション demonsɯtoɾeːʃon
+気持ち kimo̞tʃi
+家畜 kɑtʃikɯ
+騒い sɑvɑi
+ピザ pizɑ
+覇者 hɑʃɑ
+やばい jɑbɑi
+ナゼ nɑze
+変数 hensɯː
+代替 dɑitɑi
+経っ kʲo̞ɯʔ
+興味深 kʲo̞ɯmiʃin
+男性 dɑnseː
+レアル reɑrɯ
+合わ aɯa
+隠す kakɯsɯ
+ダン dɑn
+着目 tʃɑkɯmokɯ
+ざら zɑrɑ
+瓦礫 gɑreki
+執念 ʃɯːne̞n
+航空機 ko̞ɯkɯː ki
+部類 bɯrɯi
+富士急行 ɸɯdʒi kʲɯːko̞ɯ
+鍛え上げ kitɑeɑge
+證 ɑkɑʃi
+多摩大 tɑmɑdɑi
+計 ke̞ː
+横柄 o̞ːheː
+あや ɑjɑ
+離れ hɑnɑre
+こん ko̞n
+浩行 hiro̞jɯki
+フォワード ɸoːaːdo
+加盟 kɑmeː
+語りかけ kɑtɑrikɑke
+移設 ise̞tsɯ
+強調 kʲo̞ɯtʃo̞ɯ
+高める tɑkɑmerɯ
+償還 ʃoːkɑn
+満ち mitʃi
+体操 tɑizoː
+お父さん okitʃisɑn
+バレンタイン bɑrentɑin
+ピンチ pintʃi
+打ちのめさ ɯtʃinomesa
+ミニストップ minisɯtoʔpɯ
+滑らか nɑmerɑkɑ
+浄化 dʒoːkɑ
+ほど ho̞do̞
+追いつめ o̞it͡sɯme
+些細 sɑsɑi
+ポータルサイト poːtarɯsaito
+ほっぺ ho̞ʔpe
+シートベルト ʃiːto̞ berɯto̞
+賜物 tɑmɑmono
+まくし立てる mɑkɯʃitɑterɯ
+悪役 ɑkɯjɑkɯ
+湾岸 ɯangan
+けっぺき ke̞ʔpe̞ki
+休み jasɯmi
+チャージ tʃɑːdʒi
+ギア giɑ
+やがて jɑgɑte
+傾く kɑtɑmɯkɯ
+ジャーナリズム dʒɑːnɑrizɯmɯ
+にじま nidʒimɑ
+雅治 masahaɾɯ
+気に入ら ki ni hɑirɑ
+ゆえに jɯe̞ ni
+連続 renzo̞kɯ
+宗 so̞ː
+祝い iɯai
+利下げ ɾisɑʒə
+猶予 jɯːjo̞
+材料 zɑirʲoː
+困っ komɑʔ
+エクスプレス ekɯsɯpɯɾesɯ
+談義 dɑngi
+先進 se̞nʃin
+子役 kojɑkɯ
+惚れ ho̞re
+心憎い kokoronikɯi
+終始 ʃɯːʃi
+漬け t͡sɯke̞
+賄う mɑkɑnɑɯ
+なにか nɑni kɑ
+性質 se̞ːʃitsɯ
+美化 bikɑ
+いざ izɑ
+小豆 ɑzɯki
+取締役 toriʃimɑri jɑkɯ
+そん so̞n
+珍妙 tʃinmʲo̞ɯ
+小川 ogaɯa
+座る sɯwaɾɯ
+貴金属 kikinzo̞kɯ
+期する kisɯɾɯ
+重なっ kɑsɑnɑʔ
+正体 ʃoːtɑi
+悪性 akɯseː
+標識 ço̞ɯʃiki
+テコ teko̞
+国立 ko̞kɯrit͡sɯ
+ジョブズ dʒo̞bɯzɯ
+理恵 rie̞
+ピーク piːkɯ
+起因 kiin
+磐城 iɯaki
+もしか moʃi kɑ
+スポ sɯpo
+ほしかっ hoʃikɑʔ
+きっかけ kiʔkɑke
+権威 ke̞ni
+見据え misɯe
+磨け migɑke
+有識者 jɯːʃikiʃɑ
+バーター bɑːtɑː
+旧 kʲɯː
+ニュース ɲɯːs
+ベクトル bekɯto̞rɯ
+有 jɯː
+ベストセラー besɯtoseraː
+気持 kimo̞tʃi
+展開 tenkɑi
+気温 kio̞n
+揺れる jɯre̞rɯ
+軌跡 kise̞ki
+ミツカン mit͡sɯkɑn
+レンタカー rentɑkɑː
+ペーパー peːpɑː
+ダボッ dɑbo
+殉じる dʒɯndʒirɯ
+お手上げ oteɑge
+終われ oɯare
+カミングアウト kɑmingɯ ɑɯto
+信用 ʃiɲɯ
+衛星 e̞ːse̞ː
+境 sɑkɑi
+大脳 dɑinoː
+吸う sɯɯ
+いたわる itaɯarɯ
+ちょうだい tʃoːdɑi
+苦み nigɑmi
+ポテチ po̞tetʃi
+ツイン t͡sɯin
+グアルディオラ gɯɑrɯdiorɑ
+乾く kaɯakɯ
+興行 ko̞ɯgʲo̞ɯ
+アヤ ɑjɑ
+バブル bɑbɯrɯ
+付け方 t͡sɯkekɑtɑ
+解約 kɑijɑkɯ
+たしかに tɑʃikɑ ni
+終身 ʃɯːʃin
+仕上がり ʃiɑgɑri
+大量 tɑirʲoː
+稚拙 tʃise̞tsɯ
+もつれこむ mo̞t͡sɯreko̞mɯ
+苦しく kɯrɯʃikɯ
+部内 bɯnɑi
+中学生 tʃɯːgakɯseː
+チャンネル tʃɑnnerɯ
+和風 ɯaɸɯː
+土地 to̞tʃi
+ヒロイン hiro̞in
+ロッヂ ro̞ʔzi
+絶える tɑerɯ
+スペシャル sɯpeʃaɾɯ
+明かす akasɯ
+一瞬 iʔʃɯn
+形跡 ke̞ːse̞ki
+不振 ɸɯʃin
+直吉 nɑokitʃi
+マウンテン mɑɯnten
+ガセ gɑse
+一族 itʃizo̞kɯ
+同窓会 doːsoːkɑi
+くみ取っ kɯmito̞ʔ
+検査 kensɑ
+かっ kɑʔ
+各社 kɑkɯʃɑ
+西 niʃi
+派 hɑ
+繰り返す kɯɾikaesɯ
+動か ɯgokɑ
+欠かさ kɑkɑsɑ
+閉鎖 heːsɑ
+登米 to̞me
+疑り ɯtɑgɯri
+谷垣 tɑnigɑki
+景観 keːkɑn
+こじ開け kodʒiɑke
+収蔵 ʃɯːzo̞ɯ
+北 kitɑ
+豪雨 go̞ɯː
+松茸 mɑt͡sɯtɑke
+寂しかっ sɑbiʃikɑʔ
+廊下 roːkɑ
+撮り to̞ri
+港 minɑto
+つない t͡sɯnɑi
+重し o̞mo̞ʃi
+ブラックサースデー bɯraʔkɯsaːsɯdeː
+瀬川 sekaɯa
+アホ ɑho
+広がっ hirogɑ
+自暴自棄 dʒibo̞ɯ dʒiki
+女子 dʒo̞ʃi
+証 ɑkɑʃi
+ステレオ sɯteɾeo
+去勢 kjo̞seː
+請求 seːkjɯː
+カリブ kɑribɯ
+橋場 hɑʃibɑ
+無理 mɯri
+気仙沼 kezennɯma
+何ら nɑnrɑ
+未知 mitʃi
+真っ先 mɑʔsɑki
+しめる ʃime̞rɯ
+藏 kɯrɑ
+災 ɯazaɯai
+余勢 jo̞seː
+おまけ omɑke
+たどる tɑdorɯ
+ミソ miso̞
+やり手 jɑrite
+クセ kɯze
+軽い kɑrɯi
+徳力 to̞kɯriki
+小遣い kozɯkɑi
+おでん o̞den
+飯 me̞ʃi
+民放 minpo̞ɯ
+議席 gise̞ki
+梨 nɑʃi
+平野 hirɑno
+カニ kɑni
+源流 ge̞nrʲɯː
+単一 tanitsɯɯ
+撮影 sɑtsɯeː
+馴れ合い nɑreɑi
+商売 ʃoːbɑi
+優花 jɯkɑ
+背負っ seo̞ʔ
+選手 senʃɯ
+後継 ko̞ːkeː
+よぎり jo̞giri
+たて tɑte
+相撲 sɯmoː
+逆さ sɑkɑsɑ
+東燃 to̞ɯnen
+サッポロライオン sɑpororɑjɔn
+キュキュキュキュ kʲɯkʲɯ kʲɯkʲɯ
+アイコ ɑiko
+宿 jɑdo
+県警 ke̞nke̞ː
+カチン kɑtʃin
+減 ge̞n
+ナショナリスト nasonaɾisɯto
+鯨 kɯdʒirɑ
+福井 ɸɯkɯi
+アイツ ɑit͡sɯ
+政教 seːkjoː
+がり gɑri
+カーネギー kɑːnegiː
+インターフェース intaːfeːsɯ
+共働き tomobɑtɑrɑki
+馴染み nɑdʒimi
+やめよ jɑmejo
+ついてまわる t͡sɯite maɯarɯ
+晩婚 bɑnkon
+親族 ʃinzo̞kɯ
+技研 gike̞n
+悪意 ɑkɯi
+小倉 ogɯrɑ
+さわやか saɯajaka
+借り手 kɑrite
+快癒 kɑijɯ
+加熱 kɑnet͡sɯ
+反米 hɑnbeː
+財布 saifɯ
+中村 nɑkɑmɯrɑ
+遊ん ɑson
+駆け込み kɑkekomi
+殴り込み nɑgɯrikomi
+取り組め to̞rikɯme
+めざし mezɑʃi
+否め inɑme
+品目 hinmo̞kɯ
+酸素 sɑnso
+種目 ʃɯmo̞kɯ
+報奨 ho̞ɯʃo̞ɯ
+戻す modosɯ
+執着 ʃɯːtʃɑkɯ
+代表 dɑiçoː
+ネイマール nei mɑːrɯ
+診療 ʃinrʲo̞ɯ
+成年 sɯɪnən
+自家 dʒikɑ
+ベッキー be̞ʔkiː
+薬 kɯsɯɾi
+に対して ni tɑiʃite
+苗字 mʲo̞ɯdʒi
+未だに imɑdɑ ni
+震災 ʃinsɑi
+アタマ ɑtɑmɑ
+夏 nɑt͡sɯ
+機構 kiko̞ɯ
+つられ t͡sɯrɑre
+ページ pe̞ːdʒi
+やり過ごす jaɾisɯɣosɯ
+リニューアル riɲɯːɑrɯ
+嘘 ɯso
+同月 do̞ɯget͡sɯ
+山縣 jɑmɑgɑtɑ
+ヒロ hiro̞
+控え室 hikɑeʃit͡sɯ
+残ら nokorɑ
+虹 nidʒi
+地点 tʃite̞n
+しぐさ ʃiɰᵝɑzɑ
+やっぱり jɑʔpɑri
+手拍子 te ço̞ɯʃi
+霞が関 kasɯmiɠaseki
+並み nɑmi
+チーズ tʃiːzɯ
+ドタキャン dotɑkʲɑn
+委員 iin
+話し方 hɑnɑʃikɑtɑ
+早朝 so̞ːtʃo̞ː
+竹中 tɑkenɑkɑ
+課 kɑ
+細かく komɑkɑkɯ
+消え kie̞
+ひとみ hito̞mi
+所在地 ʃozɑi tʃi
+まんざら mɑnzɑrɑ
+疑問 gimo̞n
+調節 tʃo̞ːsetsɯ
+飛び越え to̞biko̞e
+近づけれ tʃikɑzɯkere
+念願 nengɑn
+著名 tʃo̞meː
+分量 bɯnrʲo̞ɯ
+増えよ ɸɯejo̞
+予算 josɑn
+用 jo̞ɯ
+興味 kʲo̞ɯmi
+処置 ʃo̞tʃi
+サマータイム samaːtaimɯ
+西沢 niʃisaɯa
+術 dʒɯt͡sɯ
+ダッシュ daʔʃɯ
+スローライフスローカメラ sɯroːraifɯsɯroːkamera
+保障 ho̞ʃo̞ɯ
+滅亡 met͡sɯbo̞ɯ
+地滑り dʒisubeɾi
+計画 keːkɑkɯ
+円満 enmɑn
+後戻り ɑtomodori
+明朝体 mʲntʃoː tai
+だまさ dɑmɑsɑ
+朴 bo̞kɯ
+格付け kɑkɯzɯke
+半値 hɑnne
+頭脳 zɯno̞ɯ
+カイロ kɑiro
+戦犯 senpɑn
+痴話 tʃiɯa
+ひじ hidʒi
+進行 ʃinko̞ɯ
+長距離 tʃo̞ɯkʲo̞ri
+撃っ ɯʔ
+琉球 rʲɯːkʲɯː
+市議 ʃigi
+上げ ɑge
+乗っかる noʔkɑrɯ
+優しく jasaʃikɯ
+添加 tenkɑ
+費 hi
+トリミング to̞rimingɯ
+滅入る me̞irɯ
+旅先 tɑbisɑki
+そこ so̞ko̞
+アンアン ɑnɑn
+ロク ro̞kɯ
+経済 keːzɑi
+政経学部 seːkeːgakɯbɯ
+消息筋 ʃoːsokusudʒi
+未来永劫 mirɑi eːgoː
+向き合う mɯkiɑɯ
+ううん ɯːn
+トリウム to̞riɯmɯ
+斡旋 ɑsen
+台所 dɑidokoro
+パク pɑkɯ
+行え okonɑe
+漁師 rʲo̞ɯʃi
+ムカッ mɯkɑ
+回し maɯaʃi
+激化 gekikɑ
+大熊 oːgɯmɑ
+蛇足 dasokɯ
+件 ke̞n
+無限 mɯge̞n
+アイス aisɯ
+仕手 ʃite̞
+振り回さ fɯɾimawasa
+憂慮 jɯːrʲo̞
+フリー ɸɯriː
+トレイ to̞rei
+マクドナルド mɑkɯdonɑrɯdo
+襟 e̞ri
+外国 gɑikokɯ
+込める ko̞merɯ
+騙さ dɑmɑsɑ
+上っ no̞bo̞ʔ
+器楽 kigɑkɯ
+屋敷 jɑʃiki
+デタラメ detɑrɑme
+強気 t͡sɯjo̞ki
+値幅 nehɑbɑ
+弾ける hike̞rɯ
+恥ずかしい hɑzɯkɑʃiː
+十分 dʒɯː ɸɯn
+昌弘 mɑsɑiɾo
+感慨深 kɑngɑiʃin
+作り方 t͡sɯkɯrikɑtɑ
+ガード gɑːdo
+値下がり nesɑgɑri
+びく bikɯ
+期 ki
+交わし kaɯaʃi
+クラ kɯrɑ
+ひっそり hiʔso̞ɾi
+お爺さん odʒiːsɑn
+スズキ sɯzɯki
+必見 hiʔke̞n
+公社 koːʃɑ
+ぜ ze̞
+丸ビル mɑrɯ birɯ
+受け止め ɯketo̞me
+陸軍 rikɯgɯn
+間欠 kɑnket͡sɯ
+制約 seːjakɯ
+チョウン tʃo̞ɯ n
+孝則 tɑkɑnori
+タイムス taimɯsɯ
+きつく kit͡sɯkɯ
+荒れ ɑre
+みるみる mirɯmirɯ
+消さ kesɑ
+漱石 so̞ːseki
+イマドキビジネスマン imadokibiʒinesɯman
+鹿 ʃikɑ
+入団 ɲɯːdɑn
+強欲 go̞ɯjo̞kɯ
+踏襲 to̞ɯʃɯː
+がる gɑrɯ
+パス pasɯ
+ハッカビー hɑʔkɑ biː
+恐慌 kʲo̞ɯko̞ɯ
+むこ mɯko̞
+芽生え mebɑe
+書類 ʃo̞rɯi
+脇の下 ɯakinoʃita
+私情 ʃidʒo̞ɯ
+無党派 mɯtoːhɑ
+受講 dʒɯko̞ɯ
+可視 kɑʃi
+イニシアティブ iniʃiɑtibɯ
+チラシ tʃirɑʃi
+車椅子 kɯrɯmaisɯ
+保安 hoɑn
+可 kɑ
+ベトナム betonɑmɯ
+売れ筋 uɾesudʒi
+後発 koːhɑt͡sɯ
+強い t͡sɯjo̞i
+後ろ向き ɯʃiro̞mɯki
+口火 kɯtʃibi
+思い出す omoidasɯ
+心強い ko̞ko̞ro̞zɯjo̞i
+シュワシュワ ʃɯɯaʃɯɯa
+虎 torɑ
+香港 ho̞nko̞n
+シャキッ ʃɑki
+純正 dʑɯnˈze̞ː
+やめ jɑme
+寝言 nego̞to̞
+映像 eːzo̞ː
+ごめんなさい gomennɑsɑi
+毒気 do̞kɯke
+バグ bɑgɯ
+ある ɑrɯ
+出前 demɑe
+ライブラリ rɑibɯrɑri
+無給 mɯkʲɯː
+当日 to̞ɯdʒit͡sɯ
+ウェバー ebɑː
+悩ましい nɑjɑmɑʃii
+あの手この手 ɑno te kono te
+ザッポス zaposɯ
+上陸 dʒo̞ɯrikɯ
+オフライン oɸɯrɑin
+葬る ho̞ɯmɯrɯ
+無心 mɯʃin
+鎖骨 sakotsɯɯ
+英国 eːko̞kɯ
+ブース bɯːsɯ
+重 dʒɯː
+応える kotɑerɯ
+伸びる no̞birɯ
+すむ sɯmɯ
+不倫 ɸɯrin
+判決 hɑnket͡sɯ
+殺到 sɑʔtoː
+優勝 jɯːʃo̞ɯ
+広東 kɑnton
+同乗 do̞ɯdʒo̞ɯ
+鬼 o̞ni
+得る ɯrɯ
+定期 te̞ːki
+クルー kɯrɯː
+盛り上げる moriɑgerɯ
+シ ʃi
+自動的 dʒido̞ɯteki
+横目 jo̞ko̞me
+弘明 hiroɑki
+老朽 ro̞ɯkʲɯː
+独りよがり hitorijogɑri
+けれども keredo̞ mo̞
+右端 migihɑʃi
+下さ kɯdasa
+稀 mɑre
+雲隠れ kɯmogɑkɯre
+すね sɯne
+火力 kɑrʲokɯ
+言い回し iimaɯaʃi
+っぷり ʔpɯri
+色合い iroɑi
+最適 sɑiteki
+スワイプ sɯwaipɯ
+時給 dʒikʲɯː
+めっちゃ meʔtʃɑ
+いきなり ikinɑri
+優待 jɯːtɑi
+張り詰め hɑrit͡sɯme
+ぬれる nɯre̞rɯ
+ボタ botɑ
+好ましい konomɑʃiː
+狭間 hɑzɑmɑ
+申し上げ moːʃiɑge
+自前 dʒimɑe
+復習 ɸɯkɯʃɯː
+大田 oːtɑ
+大きかっ oːkikɑʔ
+ギリギリ girigiri
+まるく mɑrɯkɯ
+くりかえす kɯɾikaesɯ
+日産 nɪsɑn
+慎み t͡sɯt͡sɯʃimi
+考えもの kɑngɑemono
+古今 ko̞ko̞n
+楊 jo̞ɯ
+演技 e̞ngi
+マイク mɑikɯ
+レース reːsɯ
+陳腐 tʃinpɯ
+転落 tenrɑkɯ
+遠征 e̞nse̞ː
+聞き kiki
+脚本 kʲɑkɯhon
+いる irɯ
+猛暑 mo̞ɯʃo̞
+目先 mesɑki
+軍人 gɯndʒin
+違っ tʃigɑʔ
+つまらない t͡sɯmɑrɑnɑi
+模様 mo̞jo̞ɯ
+自費 dʒihi
+漏らし morɑʃi
+きつかっ kit͡sɯkɑʔ
+事務 dʒimɯ
+ヒートアップ hiːtoɑʔpɯ
+ひたひた hitɑhitɑ
+カメルーン kɑmerɯːn
+不測 fɯsokɯ
+なり nɑri
+オヤジ ojɑdʒi
+もう一度 mo̞ɯ itʃi do̞
+制定 se̞ːte̞ː
+忍び寄っ ʃino̞bijo̞ʔ
+実際 dʒisai
+端的 tɑnteki
+ぶつかり bɯt͡sɯkɑri
+よかっ jokɑʔ
+ネパール nepɑːrɯ
+微笑ま hohoemɑ
+泣く nɑkɯ
+鉄則 tesokɯ
+近時 kindʒi
+はしゃい hɑʃɑi
+開示 kɑidʒi
+突き進ん tsɯkisɯsɯn
+グレード gɯreːdo̞
+日替わり higaɯari
+気がかり kigɑkɑri
+四方八方 ʃikɑtɑ hɑʔpoː
+もま mo mɑ
+読める jo̞merɯ
+斗 to̞
+通じる t͡sɯːdʒirɯ
+隠さ kakɯsa
+税目 zeːmo̞kɯ
+誰か dɑre kɑ
+西濃運輸 seːnoːɯɲjɯ
+武装 bɯsoː
+後味 ɑtoɑdʒi
+小さい tʃiːsɑi
+温泉 ɔnsən
+同じく onɑdʒikɯ
+印鑑 inkɑn
+パ pɑ
+憲章 kenʃo̞ɯ
+峰 mine̞
+直立 tʃo̞kɯrit͡sɯ
+命がけ inotʃigɑke
+ウェブ e̞bɯ
+山下 jɑmɑʃitɑ
+柳井 jɑnɑi
+減らし herɑʃi
+引い hii
+現す aɾawasɯ
+ディズニー dizɯniː
+流入 rʲɯːɲɯː
+趙 tʃo̞ɯ
+連載 ɾensɑi
+呼吸 ko̞kʲɯː
+ハンカチ hɑnkɑtʃi
+本 ho̞n
+下放 kɑhoː
+知事 tʃidʒi
+達し tɑsʃi
+ないしは naiʃi ɯa
+論文 ro̞nbɯn
+神経質 ʃinke̞ːʃit͡sɯ
+市議会 ʃi gikɑi
+数多く kɑzɯoːkɯ
+夢にも jɯme ni mo̞
+鮮明 semmɑi
+めったに meʔtɑ ni
+三条 sɑɲʒoː
+摂 setsɯzɯ
+見習い minɑrɑi
+仕組み ʃikɯmi
+マスタード masɯtaːdo
+いっつも iʔt͡sɯ mo̞
+主役 ʃɯjɑkɯ
+ペンギン pe̞ngin
+かぜ kɑze
+弁解 benkɑi
+すらすら sɯɾasɯɾa
+つまま t͡sɯ mɑmɑ
+お正月 oʃoːgɑt͡sɯ
+ままならなく mɑmɑ nɑrɑ nɑkɯ
+遂げる to̞gerɯ
+押さ osɑ
+あいつ ɑit͡sɯ
+いいっ iːʔ
+高貴 ko̞ɯki
+お茶の水女子大 otʃɑnomizɯ dʒoʃidɑi
+もん mo̞n
+主将 ʃɯʃo̞ɯ
+詰めかける t͡sɯmekɑkerɯ
+マケイン mɑkein
+選定 se̞nte̞ː
+ヘラヘラ herɑherɑ
+半数 hansɯː
+フライト ɸɯrɑito
+あそこ ɑsoko
+作家 sɑkɑ
+大き o̞ːki
+開幕 kɑimɑkɯ
+ワン ɯan
+補足 hosokɯ
+まして mɑʃite
+英知 e̞ːtʃi
+カズオ kɑzɯo
+上り no̞bo̞ri
+低地 te̞ːtʃi
+成り立つ nɑritɑt͡sɯ
+臆 ɑː
+整形 se̞ːke̞ː
+朗報 ro̞ɯho̞ɯ
+市況 ʃikʲo̞ɯ
+凄まじい susamadʒiː
+田坂 tɑsɑkɑ
+でき de̞ki
+実需 dʒit͡sɯdʒɯ
+際立っ kiɯadaʔ
+透き通る sɯkitooɾɯ
+議長 gitʃo̞ɯ
+里中 sɑtonɑkɑ
+次第 ʃidɑi
+桃 mo̞mo̞
+悩ま nɑjɑmɑ
+鉱石 ko̞ːseki
+アレルギー ɑrerɯgiː
+開講 kɑikoː
+共に to̞mo̞ ni
+パタゴニア pɑtɑgoniɑ
+キリスト kiɾisɯto
+続けれ t͡sɯzɯke̞re̞
+必携 hiʔke̞ː
+閉じる to̞dʒirɯ
+党内 toːnɑi
+駆け足 kɑkeɑʃi
+移転 ite̞n
+瀬 se̞
+ある程度 ɑrɯ teːdo
+ナノ nɑno
+人達 hitotɑtʃi
+アトレティコ ɑtoretiko
+木村 kimɯrɑ
+迷惑 meːɯakɯ
+網膜 moːmɑkɯ
+鷹山 joːzɑn
+バルセロナ baɾɯseɾona
+わし ɯaʃi
+ロールモデル ro̞ːrɯ mo̞derɯ
+ゴメス gomesɯ
+大好き daisɯki
+逃し nogɑʃi
+混迷 ko̞nmeː
+社宅 ʃɑtɑkɯ
+パレード pɑreːdo
+巨人 kʲo̞dʒin
+プランナー pɯrɑnnɑː
+きれ kire̞
+見える mie̞rɯ
+動い ɯgo̞i
+ルネサス rɯnesasɯ
+ギスギス gisɯgisɯ
+学力 gɑkɯrʲokɯ
+立ち直ら tɑtʃinɑorɑ
+原則 gensokɯ
+パスミス pasɯmisɯ
+富士急 ɸɯdʒikʲɯː
+度重なる tabɪkasanaɾɯ
+乗り切る no̞rikirɯ
+そんな sonnɑ
+傑作 keʔsakɯ
+カレー kɑreː
+ほっと ho̞ʔto̞
+巻き mɑki
+サイズ saizɯ
+近づける tʃikɑzɯkerɯ
+外車 gɑiʃɑ
+極細 gokɯboso
+たるみ tɑrɯmi
+吉 kitʃi
+予定 jo̞teː
+家業 kɑgʲoː
+至って itɑʔte
+年下 toʃiʃitɑ
+池辺 ike̞be̞
+良心 rʲo̞ɯʃin
+決勝 kɛʔʃo̞ː
+デジカメ dedʒikɑme
+ハズレ hɑzɯre
+なくなら nɑkɯnɑrɑ
+トーマ toːmɑ
+レポーター repoːtɑː
+真冬 mɑɸɯjɯ
+マエストロ maesɯtoɾo
+お礼 o̞reː
+月々 t͡sɯkizɯki
+要件 jo̞ɯken
+表す aɾawasɯ
+面持ち o̞mo̞mo̞tʃi
+ラマーリョ rɑmɑːrʲo
+抜ける nɯke̞rɯ
+号外 goːgɑi
+起こし o̞ko̞ʃi
+セリフ seɾifɯ
+預金 jo̞kin
+未定 mite̞ː
+外さ hazɯsa
+たまたま tɑmɑtɑmɑ
+定着 teːtʃɑkɯ
+きっちり kiʔtʃiri
+進み sɯsɯmi
+暖か ɑtɑtɑkɑ
+バンド bɑndo
+まぎらわしい magiraɯaʃiː
+最下位 sɑikɑi
+助成 dʒo̞seː
+体重 tɑidʒɯː
+働きかけ hɑtɑrɑkikɑke
+怠っ okotɑʔ
+回転 kɑiten
+つじつま t͡sɯdʒit͡sɯmɑ
+横浜国立大学 jokohɑmɑ kokɯrit͡sɯ dɑigɑkɯ
+リフレ riɸɯre̞
+差し込む saʃikomɯ
+赤身 ɑkɑmi
+外資 gɑiʃi
+学内 gɑkɯnɑi
+飾る kɑzɑrɯ
+調整 tʃo̞ːseː
+っと ʔto̞
+置き o̞ki
+パキスタン pakisɯtan
+トレーニング to̞reːningɯ
+友和 tomokɑzɯ
+わん ɯan
+効率 ko̞ɯrit͡sɯ
+安上がり jasɯaɣaɾi
+薬物 jɑkɯbɯt͡sɯ
+統合 to̞ɯgo̞ɯ
+主流 ʃɯrʲɯː
+政策 seːsakɯ
+ともども to̞mo̞do̞mo̞
+号 go̞ɯ
+実践 dʑisən
+代名詞 dɑimeːʃi
+世界一 sekɑiitʃi
+昇格 ʃoːkɑkɯ
+ヤジ jɑdʒi
+収まら osɑmɑɾɑ
+匡 tɑdɑʃi
+タグ tɑgɯ
+思い立ち omoitɑtʃi
+省 ʃo̞ɯ
+強いる ʃiirɯ
+悪びれる ɯarɯbirerɯ
+負けず嫌い mɑkezɯ kirɑi
+気兼ね kigɑne
+産 sɑn
+違え tʃigɑe
+マス masɯ
+飛び散っ to̞bitʃiʔ
+本当 ho̞nto̞ɯ
+ラピーヌ rɑpiːnɯ
+波及 hɑkʲɯː
+三唱 sɑnsoː
+けれど keredo̞
+調理場 tʃo̞ɯridʒo̞ɯ
+松阪 mɑt͡sɯzɑkɑ
+引き下げ hikisɑge
+運ぶ hɑkobɯ
+匠 tɑkɯmi
+ウルキオラ ɯrɯkiorɑ
+ダサい dɑsɑi
+オランダ orɑndɑ
+邦銀 ho̞ɯgin
+法令 ho̞ːreː
+図っ zɯʔ
+構文 ko̞ɯbɯn
+酸味 sɑnmi
+モール mo̞ːrɯ
+ルビー rɯbiː
+有望 jɯːbo̞ɯ
+旅行 rʲo̞ko̞ɯ
+キッ ki
+三脚 sankjakɯ
+カワイイ kaɯaiː
+運気 ɯnki
+精子 se̞iʃi
+費目 himo̞kɯ
+経過 keːkɑ
+削除 sakɯxo
+クリア kɯriɑ
+壮一 so̞ːitʃi
+狩猟 ʃɯrʲo̞ɯ
+売切れ ɯrikire̞
+案 ɑn
+クリスマス kɯɾisɯmasɯ
+執ら torɑ
+会計 kɑikeː
+ほんとう ho̞nto̞ɯ
+のぼり no̞bo̞ri
+改まっ ɑrɑtɑmɑʔ
+現物 ge̞nbɯt͡sɯ
+子会社 ko kɑiʃɑ
+飛び込め to̞biko̞me
+有権者 jɯːkenʃɑ
+松戸 mɑt͡sɯdo
+近畿 kinki
+不透明 ɸɯto̞ːmeː
+サマースクール samaːsɯkɯːrɯ
+テクスト tekɯsɯto
+枠 ɯakɯ
+きみ kimi
+公開 koːkɑi
+リーク riːk
+賛同 sɑndoː
+藤原 ɸɯdʒiɯara
+親和 ʃinɯa
+ウグイス ɯɡisɯ
+み mi
+研ぎ澄ませ toxisɯmase
+無表情 mɯço̞ɯdʒo̞ɯ
+大差 tɑisɑ
+聞こ kiko̞
+亡くす nakɯsɯ
+デスク desɯkɯ
+ヒッグス higɯsɯ
+見当 kento̞ɯ
+制 se̞ː
+施し ho̞do̞ko̞ʃi
+神保 dʒinbo̞
+積み残さ tsɯɯminokosa
+大小 dɑiʃoː
+しょうが ʃoː gɑ
+横文字 jo̞ko̞ mo̞dʒi
+遠い to̞ːi
+調える to̞to̞no̞erɯ
+なん nɑn
+先入観 seɲɲɯːkan
+倉橋 kɯrɑhɑʃi
+これ ko̞re
+あるいは ɑrɯihɑ
+あげ ɑge
+省庁 ʃo̞ɯtʃo̞ɯ
+色気 iro̞ke
+フェルプス feɾɯpɯsɯ
+連れ出し t͡sɯredɑʃi
+啖呵 tɑnkɑ
+ゲリラ gerirɑ
+金 kin
+一新 iʔʃin
+念入り ne̞niɾi
+通じ t͡sɯːdʒi
+今春 ko̞nʃɯn
+単に tɑnni
+自動 dʒido̞ɯ
+幼稚 jo̞ɯtʃi
+打てる ɯte̞rɯ
+追い越し o̞iko̞ʃi
+暴い ɑbɑi
+アドバイザー ɑdobɑizɑː
+エンド endo̞
+学者 gɑkɯʃɑ
+長官 tʃoːkɑn
+意地 idʒi
+本部 ho̞nbɯ
+音 o̞to̞
+道義 do̞ɯgi
+夜景 jɑkeː
+そっけ so̞ʔkə
+転倒 tento̞ɯ
+基礎 kiso̞
+しまお ʃimɑo
+万端 bɑntɑn
+ユズ jɯzɯ
+破 hɑ
+ランチタイム rɑntʃitɑimɯ
+文章 bɯnʃo̞ɯ
+試食 ʃiʃo̞kɯ
+津島 t͡sɯʃimɑ
+現在地 genzɑitʃi
+次いで t͡sɯide̞
+飛び入り to̞biiri
+ウチ ɯtʃi
+不法 ɸɯho̞ɯ
+叫ん sɯːkən
+春本 ʃɯnpo̞n
+ホリ ho̞ri
+陸連 rikɯre̞n
+祐一郎 jɯːitʃiro̞ɯ
+図書館 toʃokɑn
+井 i
+飢餓 kigɑ
+厳格 genkɑkɯ
+ヤミ jɑmi
+ジョージ dʒo̞ːdʒi
+認め mito̞me
+常時 dʒo̞ɯdʒi
+大まか oːmɑkɑ
+ズガッ zɯgɑ
+かける kɑkerɯ
+ゆく jɯkɯ
+きのこ kino̞ko̞
+清掃 seːso̞v
+ラッキー rɑʔkiː
+隣り合わせ tonaɾjaɯase
+円盤 enbɑn
+カメラマン kɑmerɑmɑn
+分り ɯakari
+戦死 se̞nʃi
+恐る恐る osoɾɯosoɾɯ
+盛りだくさん moɾidakɯsan
+空 soɾɑ
+暖かく ɑtɑtɑkɑkɯ
+ヤカン jɑkɑn
+要因 jo̞ɯin
+場外 dʒoːgɑi
+無精 mɯseː
+ムラタ mɯrɑtɑ
+でき上がり dekiɑgɑri
+精悍 seːkɑn
+ねだら nedɑrɑ
+やく jɑkɯ
+必ずしも kɑnɑrɑzɯ ʃimo
+イボンヌ i bo̞nnɯ
+いえ ie̞
+及ば ojobɑ
+役割 jakɯɯari
+銀行株 ginkoː kɑbɯ
+あぜん ɑzen
+姉妹 ʃimɑi
+投げ nɑge
+三角 sankakɯ
+表沙汰 omotezɑtɑ
+大島 oːʃimɑ
+しんこう ʃin ko̞ɯ
+疲れ t͡sɯkɑre
+名 nɑ
+粒子 rʲɯːʃi
+かなり kɑnɑri
+医者 iʃɑ
+くじ kɯdʒi
+潰れ t͡sɯbɯre̞
+かけつけ kɑket͡sɯke
+ごとく go̞to̞kɯ
+奇 ki
+債権 sɑiken
+申し出 mo̞ɯʃide
+危うく ɑjɑɯkɯ
+シリウス ʃiɾiɯsɯ
+猿之助 ɛnosɯkɛ
+集合 ʃɯːgo̞ɯ
+感触 kɑnʃokɯ
+示し ʃime̞ʃi
+臣民 ʃinmin
+気が付い ki gɑ t͡sɯi
+本格 honkɑkɯ
+悪評 ɑkɯçoː
+受動 dʒɯdo̞ɯ
+トヨタホームミサワ tojotahoːmɯmisawa
+格好いい kɑʔkoː iː
+アラサー ɑrɑsɑɯ
+信号 ʃingo̞ɯ
+草食 soːʃokɯ
+祭神 saidʒin
+実母 dʒit͡sɯbo̞
+トウ to̞ɯ
+追い越そ o̞iko̞so̞
+後楽園 koːrɑkɯen
+重宝 tʃo̞ɯho̞ɯ
+欠かす kakasɯ
+よく jo̞kɯ
+賢い kɑʃikoi
+なか nɑkɑ
+あらゆる ɑrɑjɯrɯ
+番号 bɑngoː
+余力 jo̞rʲo̞kɯ
+接点 se̞te̞n
+リバプール ribɑpɯːrɯ
+足下 ɑʃimoto
+台無し dɑinɑʃi
+丁寧 te̞ːne̞ː
+いかほど ikɑ hodo
+クラス kɯɾasɯ
+バランス baɾansɯ
+待ち伏せ matʃiβɯse
+長丁場 nɑgɑ tʃoːbɑ
+不眠症 ɸɯminʃo̞ɯ
+売ら ɯrɑ
+じわじわ dʒiɯadʒiɯa
+概念 gɑinen
+実り mino̞ri
+セラピー serɑpiː
+余り ɑmɑri
+松嶋 mɑt͡sɯʃimɑ
+錯綜 sakɯsoː
+法制 ho̞ːseː
+砲撃 ho̞ɯgeki
+足元 ɑʃimoto
+超 tʃo̞ɯ
+幼稚園 jo̞ɯtʃien
+カテゴリ kɑtegori
+赤ちゃん ɑkɑtʃɑn
+自由 dʒijɯː
+サイレン sɯiɾe̞n
+落ち着い o̞tʃit͡sɯi
+秀治 ʃɯːdʒi
+同名 do̞ːmeː
+拉致 rɑtʃi
+頼っ tɑjoʔ
+婚約 koɲakɯ
+南口 minɑmigɯtʃi
+登山 tozɑn
+信頼 ʃinrɑi
+世界 sekɑi
+ようやく joːjɑkɯ
+寄り jo̞ri
+亀裂 kire̞t͡sɯ
+炊く tɑkɯ
+貸金 kɑʃikin
+フォード ɸo̞ːdo̞
+ジュゴン dʒɯgo̞n
+遅 tʃi
+ポジティブ po̞dʒitibɯ
+フォーリンラブ ɸoːrin rɑbɯ
+川面 kaɯamo
+及ぼす ojobosɯ
+モノ mo̞no̞
+市原 itʃihɑrɑ
+面食らっ menkɯrɑʔ
+恵まれ megɯmɑre
+乳液 ɲɯːe̞ki
+認める mito̞merɯ
+左足 hidɑriɑʃi
+金沢 kanazaɯa
+焼肉 jɑkinikɯ
+従来 dʒɯːrɑi
+阪急 hɑnkʲɯː
+不利益 ɸɯrie̞ki
+ロゴ ro̞go̞
+次 t͡sɯgi
+禍根 kɑkon
+病 jɑmɑi
+本誌 ho̞nʃi
+離島 rito̞ɯ
+這い hɑi
+漂っ tɑdɑjoʔ
+なぞり nɑzori
+管 kɑn
+山梨 jɑmɑnɑʃi
+開き hirɑki
+すがすがしい sɯɡasɯɡaʃiː
+無難 bɯnɑn
+品種 hinʃɯ
+新旧 ʃinkʲɯː
+新潟 niːgɑtɑ
+窃盗 seʔto̞ː
+最たる sajtaɾɯ
+用途 jo̞ɯto̞
+まとめ買い mɑtomegɑi
+出かける dekɑkerɯ
+もう mo̞ɯ
+ベージュ be̞ːdʒɯ
+夜店 jo̞mise
+捜す sagasɯ
+約 jɑkɯ
+落ち込む o̞tʃiko̞mɯ
+危なっかしい ɑbɯnɑʔkɑʃii
+だし dɑʃi
+追っ o̞ʔ
+淡泊 tɑnpɑkɯ
+消耗 ʃo̞ɯmo̞ɯ
+気高い kedɑkɑi
+神 kɑmi
+太極拳 tɑikʲokɯken
+社説 ʃɑsetsɯ
+同人 do̞ɯnin
+水質 sɯiʃitsɯ
+隊員 tɑiin
+出さ dɑsɑ
+法人 ho̞ɯdʒin
+変える kɑerɯ
+終わり oɯari
+敷地 ʃikitʃi
+中枢 tʃɯːsɯː
+中産 tʃɯːsan
+ですが desɯɣa
+棟 mɯne̞
+後続 ko̞ɯzo̞kɯ
+研究 ke̞nkʲɯː
+イラク irɑkɯ
+換言 kɑngen
+強打 kʲoːdɑ
+着想 tʃakɯsoː
+抑 so̞mo̞so̞mo̞
+あなどっ ɑnɑdo
+自主トレ dʒiʃɯ to̞re
+スクエア sɯkwea
+合う ɑɯ
+小畑 obɑtɑ
+打ちどころ ɯtʃido̞ko̞ro̞
+浮き沈み ɯkiʃizɯmi
+鳴り響い nɑrihibii
+絵師 e̞ʃi
+つまずい t͡sɯmɑzɯi
+敗退 hɑitɑi
+旭山 ɑsɑhijɑmɑ
+数式 sɯːʃiki
+野田 nodɑ
+下げる sagerɯ
+ブラジャー bɯrɑdʒɑː
+マヨネーズ mɑjoneːzɯ
+再来 sɑiɾɑi
+全開 zenkɑi
+早 hɑjɑ
+ぐるみ gɯrɯmi
+頑丈 gɑndʒoː
+近い tʃikɑi
+けん制 ke̞nse̞ː
+問いかけ toikɑke
+仏像 bɯt͡sɯzo̞ɯ
+付 ɸɯ
+将来 ʃoːrɑi
+居酒屋 izɑkɑjɑ
+競技 kʲo̞ɯgi
+四 jo̞n
+野党 jɑtoː
+引っ掛け hiʔkɑke
+なにせ nɑnise
+美味 bimi
+大騒ぎ oːsaɯagi
+私大 ʃidɑi
+囲む kɑkomɯ
+クライメートゲート kɯrɑimeːto geːto
+穏便 o̞nbin
+上目遣い ɯɯamet͡sɯkai
+あおい ɑoi
+空っぽ kɑrɑʔpo
+蟹 kɑni
+野久保 no̞kɯbo̞
+クルド kɯrɯdo̞
+あちこち ɑtʃikotʃi
+ほっとけ ho̞ʔto̞ke
+持ち主 mo̞tʃinɯʃi
+参院 sɑnin
+愛娘 manamɯsɯme
+カメコ kɑme ko
+染まら somɑɾɑ
+ガス gasɯ
+釣っ t͡sɯʔ
+踏む ɸɯmɯ
+存亡 so̞nbo̞ː
+つぎつぎ t͡sɯgit͡sɯgi
+子 ko̞
+東部 to̞ɯbɯ
+語り kɑtɑri
+知り ʃiri
+のく no̞kɯ
+機種 kiʃɯ
+特異 to̞kɯi
+心掛け kokorogɑke
+ライアン rɑiɑn
+出鼻 debɑnɑ
+リポーター ripoːtɑː
+味覚 mikɑkɯ
+相談 so:dɑn
+回数 kaisɯː
+奈辺 nɑhen
+勝た kɑtɑ
+これら korerɑ
+栄誉 eːjo̞
+滞っ to̞do̞ko̞o̞ʔ
+管弦楽 kɑngengɑkɯ
+真っ白 mɑʔʃiɾoʔ
+国外 kokɯgɑi
+痛ま itɑmɑ
+それどころか soredokorokɑ
+ミュージカル mʲɯːdʒikɑrɯ
+試す tamesɯ
+アドピ ɑdopi
+アイコン ɑikon
+スキル sɯkiɾɯ
+切り返し kirikɑeʃi
+ギャップレスマイクロレンズ gɒʔpɯresumaikurorenzu
+次第に ʃidɑi ni
+肩 kɑtɑ
+ソリスト soɾisɯto
+深み ɸɯkɑmi
+トヨタ tojotɑ
+設け mo̞ɯke
+ラストスパート rasɯtosɯpaːto
+農林 no̞ɯrin
+立 tɑte
+釜 kɑmɑ
+焦り ɑseɾi
+困る komɑrɯ
+鍼灸 ʃinkʲɯː
+近寄る tʃikɑjorɯ
+軟骨 nɑnkot͡sɯ
+たくっ tɑkɯʔ
+借りる kɑrirɯ
+クイズ kɯizɯ
+イエロー iero̞ː
+阿吽 ɑɯn
+壇上 dɑndʒoː
+各市 kɑkɯʃi
+人口 dʒinko̞ɯ
+肩入れ kɑtɑire
+暴落 boːrɑkɯ
+ソフトテニス sofɯtotenisɯ
+明日 asɯ
+称え tonɑe
+ありゃ ɑrʲɑ
+折り合わ oriaɯa
+こり ko̞ri
+毎月 mɑit͡sɯki
+使える t͡sɯkɑerɯ
+力強い tʃikɑrɑzɯjoi
+巨額 kʲogɑkɯ
+通告 t͡sɯːko̞kɯ
+制御 seːgo
+もたらさ motɑɾɑsɑ
+怖かっ koɯakaʔ
+国防省 ko̞kɯbo̞ɯʃo̞ɯ
+畜産 tʃikɯsan
+行なう okonɑɯ
+准 dʒɯn
+イケメンシェフ ike̞me̞n ʃe̞ɸɯ
+ひょっと ço̞ʔto̞
+通学 t͡sɯːgɑkɯ
+走り込む hɑʃirikomɯ
+機体 kitɑi
+廃屋 hɑiokɯ
+いつの間にか it͡sɯnomɑnikɑ
+一彦 kɑzɯhiko
+背後 hɑigo
+度外視 dogɑiʃi
+ざっと zɑʔ to
+付い t͡sɯi
+使い t͡sɯkɑi
+名簿 meːbo̞
+突然 to̞t͡sɯzen
+予備校 jo̞biko̞ɯ
+千差万別 sensɑbɑnbetsɯ
+アーティー ɑːtiː
+見やす mijasɯ
+溜まっ tɑmɑʔ
+代議 dɑigi
+気に入っ ki ni hɑiʔ
+注ぎ so̞so̞gi
+かかり kɑkɑri
+受け付け ɯke̞t͡sɯke̞
+コタツ kotɑt͡sɯ
+任期 ninki
+お知らせ oʃiɾɑse
+一筋 hitosudʒi
+鳥肌 torihɑdɑ
+未婚 miko̞n
+祝福 ʃɯkɯɸɯkɯ
+同士 do̞ɯʃi
+潰さ tsɯbɯsa
+てよ te jo̞
+不全 ɸɯze̞n
+目新しい meɑtɑrɑʃiː
+人民 dʒinmin
+専業 sengjo̞ː
+浩章 hiroɑki
+槍 jɑri
+起動 kido̞ɯ
+進出 ʃinʃɯt͡sɯ
+以後 igo̞
+毎日新聞 mɑinitʃi ʃinbɯn
+ビラ birɑ
+河合 kaɯai
+グワッ gɯɯa
+立て tɑte
+富士大 ɸɯdʒidɑi
+転用 tenjo̞ː
+舞い mɑi
+明治学院大 meːdʒi gɑkɯindɑi
+輝く kɑgɑjɑkɯ
+繊維 se̞ni
+巻き込み mɑkikomi
+声援 sɯjɯn
+起 ki
+移動 ido̞ɯ
+とら torɑ
+チー tʃiː
+正当 seːto̞ː
+頑張り gɑnbɑri
+こぼし ko̞bo̞ʃi
+血 tʃi
+お好み焼き okonomijɑki
+リーグ riːgɯ
+取り寄せ to̞ɾijo̞se
+転換期 tenkɑn ki
+視力 ʃirʲo̞kɯ
+遠慮なし enrʲo nɑʃi
+サーバ sɑːbɑ
+実情 dʒit͡sɯdʒo̞ɯ
+共感 kʲoːkɑn
+り ri
+完ぺき kɑnpeki
+混ぜ合わ maze aɯa
+大橋 oːhɑʃi
+ドリーム do̞riːmɯ
+下地 ʃitɑdʒi
+きめ kime̞
+ソウル sowɾɯ
+和彦 kɑzɯhiko
+機 ki
+ペース peːsɯ
+鉱 ko̞ɯ
+唾 t͡sɯbɑ
+むしばん mɯʃibɑn
+選曲 senkjokɯ
+品質 hinʃit͡sɯ
+売りこま ɯrikomɑ
+通院 t͡sɯːin
+内蔵 nɑizoː
+納税 no̞ɯzeː
+ミスターミニスター misɯtaːminisɯtaː
+もってこい mo̞ʔte ko̞i
+風間 kɑzɑmɑ
+優勢 jɯːseː
+堪能 tɑnnoː
+裁き sɑbɑki
+店頭 tento̞ɯ
+格闘 kɑkɯtoː
+装備 so̞ːbi
+歌詞 kɑʃi
+投げ出す nagedasɯ
+異性 ise̞ː
+東大 toːdɑi
+キミ kimi
+循環 dʒɯnkɑn
+ゲンキ ge̞nki
+寝具 ʃingɯ
+つづける t͡sɯzɯke̞rɯ
+頼む tɑnomɯ
+せ se̞
+向かっ mɯkɑʔ
+真っ mɑʔ
+正式 se̞ːʃiki
+おしまい oʃimɑi
+ペンタックス pentakekɯsɯ
+師走 ʃiwasɯ
+声優 seːjɯː
+当人 to̞ɯnin
+古着 ɸɯrɯgi
+マンガ mɑngɑ
+コミュ ko̞mʲɯ
+共産 kjoːsɑn
+りっぷく riʔ pɯ kɯ
+生き残る ikino̞ko̞rɯ
+めった meʔtɑ
+転じ te̞ndʒi
+無くなっ nɑkɯnɑʔ
+暴れん ɑbɑren
+下関 ʃimo̞no̞seki
+ほっとい ho̞ʔto̞i
+誇張 ko̞tʃo̞ɯ
+不足 fɯsokɯ
+果たせる hataseɾɯ
+数値 sɯːtʃi
+過ぎ sɯgi
+飲食 inʃo̞kɯ
+ハンズマン hɑnzɯ mɑn
+火 hi
+不具合 ɸɯgɯɑi
+コ ko̞
+当時 to̞ɯdʒi
+ほぐす hogɯsɯ
+服役 ɸɯkɯe̞ki
+ルート rɯːto̞
+消化 ʃoːkɑ
+借入金 kɑriirekin
+添え so̞e
+手間取る temɑdorɯ
+直撃 tʃo̞kɯgeki
+支部 ʃibɯ
+様変わり samaɡaɯari
+直輝 nɑoki
+下半期 kɑhɑnki
+時折 to̞kio̞ri
+満点 mɑnten
+よろしい jo̞ro̞ʃiː
+焙 ?
+見逃せ minoɰᵝɑse
+申し mo̞ɯʃi
+ただ tɑdɑ
+新語 ʃingo̞
+大々的 dɑidɑiteki
+浮浪 ɸɯro̞ɯ
+アンダー ɑndɑː
+混じっ mɑdʒiʔ
+真二 ʃindʒi
+お菓子 okɑʃi
+新手 ɑrɑte
+あがり ɑgɑri
+ビザ bizɑ
+起源 kige̞n
+小玉 kodɑmɑ
+魅せる miseɾɯ
+剛 t͡sɯjo̞ʃi
+正面 ʃo̞ɯmen
+遠かっ toːkɑʔ
+キリン kirin
+野口 no̞gɯtʃi
+あんな ɑnnɑ
+華やかさ hɑnɑjɑkɑsɑ
+うなっ ɯnɑʔ
+ざる zɑrɯ
+だが dɑgɑ
+忘れる vasɯɾeɾɯ
+否認 hinin
+何 nɑn
+前作 zensakɯ
+上がり ɑgɑri
+出会う deɑɯ
+局長 kʲo̞kɯtʃo̞ɯ
+官房 kɑnboː
+語学 gogɑkɯ
+体内 tɑinɑi
+御所 go̞ʃo̞
+税金 ze̞ːkin
+頑張れ gɑnbɑre
+骨 ho̞ne
+ムカデ mɯkɑde
+おとなしく otonɑʃikɯ
+一応 itʃio̞ɯ
+結成 ke̞ʔse̞ː
+前のめり mɑenomeri
+京極 kʲo̞ɯgo̞kɯ
+採っ to̞ʔ
+ほころぶ ho̞ko̞ro̞bɯ
+いちいち itʃiitʃi
+付き合っ t͡sɯkiɑʔ
+サリ sɑɾi
+あなた ɑnɑtɑ
+初戦 ʃo̞zɯn
+親父 ojɑdʒi
+大陸 tɑirikɯ
+軽症 keːʃo̞ː
+紙 kɑmi
+坊 bo̞ɯ
+最短 sɑitɑn
+余儀なく jogi nɑkɯ
+硬い kɑtɑi
+捕手 ho̞ʃɯ
+盛り上がっ moriɑgɑʔ
+奏し so̞ːʃi
+教える o̞ʃierɯ
+女優 dʒo̞jɯː
+ババ bɑbɑ
+新党 ʃinto̞ɯ
+レンズ re̞nzɯ
+池田 ikedɑ
+ミリ miri
+翻訳 hoɲakɯ
+王国 o̞ɯko̞kɯ
+死にかけ ʃi ni kɑke
+レストラン ɾesɯtoɾan
+譲り合い jɯzɯriɑi
+学徒 gɑkɯto
+持て mo̞te
+天国 tengo̞kɯ
+捉え torɑe
+単元 tɑngen
+妃 hi
+域 iki
+磨く migɑkɯ
+イナゴ inɑgo
+プディング pɯdingɯ
+白壁 ʃirɑkɑbe
+寒天 kɑnten
+ヘア heɑ
+むしろ mɯʃiro̞
+少ない sɯkɯnai
+船橋 ɸɯnɑbɑʃi
+手伝い tet͡sɯdɑi
+中退 tʃɯːtɑi
+もて mo̞te
+メール me̞ːrɯ
+保存 ho̞zo̞n
+やっかい jɑʔkɑi
+おしゃべり oʃɑberi
+何とも nɑn to mo
+ごった返し goʔtɑgɑeʃi
+新体操 ʃintɑisoː
+控訴 ko̞ːso̞
+統計 to̞ːkeː
+軍縮 gɯnʃɯkɯ
+質流れ ʃitʃinɑgɑre
+スプレー sɯpɯɾeː
+見よ mijo̞
+配線 hɑisen
+おぼえ o̞bo̞e
+宿し jɑdoʃi
+こちょのことじゃねいか kotʃjonokotodʒɑneːkɑ
+比べれ kɯrɑbere
+メロドラマ merodorɑmɑ
+キーボード kiːbo̞ːdo̞
+弱い joɯai
+トライ torɑi
+口出し kɯtʃidɑʃi
+収納 ʃɯːno̞ɯ
+ご馳走 go̞tʃizo̞ː
+みれ mire̞
+土台 dodɑi
+ふわふわ ɸɯɯaɸɯɯa
+アタシ ɑtɑʃi
+つけ t͡sɯke̞
+じっくり dʒiʔkɯri
+ベジタリアン bedʒitɑriɑn
+鳴い nɑi
+伴っ tomonɑʔ
+貧血 hinke̞t͡sɯ
+村長 so̞ntʃo̞ː
+拾う hiro̞ɯ
+両 rʲo̞ɯ
+整髪 seːhɑtsɯ
+殺人 satsɯʒin
+メダル medɑrɯ
+公約 koːjɑkɯ
+優遇 jɯːgɯː
+三 sɑn
+概略 gɑirʲɑkɯ
+ブカン bɯkɑn
+此処 ko̞ko̞
+打開 dɑkɑi
+理解 rikɑi
+貝津 kɑi t͡sɯ
+付せん fɯzen
+ヴァラナシ vɑɾɑnɑʃi
+不得意 ɸɯto̞kɯi
+脱線 dɑsən
+対照 tɑiʃoː
+異母 ibo̞
+引き下がら hikisɑɣɑɾɑ
+どうにか doː ni kɑ
+これぞ ko̞re zo̞
+つかう t͡sɯkɑɯ
+後編 ko̞ɯhen
+仕掛け ʃikɑke
+糖 to̞ɯ
+オクサワリョウコ okɯsawarjoːko
+苦しみ kɯrɯʃimi
+携わる tazɯsavaɾɯ
+こぼす kobosɯ
+中絶 tʃɯːze̞t͡sɯ
+まるっきり mɑrɯʔkiri
+峨山 ??
+癒し ijɑʃi
+レッスン ɾe̞sɯn
+カンファレンス kanfaɾensɯ
+最寄り mo̞jo̞ri
+不幸 ɸɯko̞ɯ
+スキャナー sɯkjanaː
+惑わさ madoɯasa
+作曲 saʔkjokɯ
+飛び出し tobidɑʃi
+農具 no̞ɯgɯ
+卑怯 hikʲo̞ɯ
+言葉 kotobɑ
+辞去 dʒikʲo̞
+変電 he̞nde̞n
+エトセトラ etosetoɾɑ
+べから bekɑrɑ
+老け ɸɯke̞
+隣人 rindʒin
+当たる ɑtɑrɯ
+講習 ko̞ɯʃɯː
+沙 sɯna
+掛川 kakegaɯa
+配信 hɑiʃin
+高調 ko̞ɯtʃo̞ɯ
+工程 ko̞ːteː
+申告 ʃinko̞kɯ
+信条 ʃindʒo̞ɯ
+停 te̞ː
+賄わ makanaɯa
+伝わる t͡sɯtaɯarɯ
+目 me̞
+休戦 kjɯːsen
+金魚 kingʲo̞
+ネス nezɯ
+つつ t͡sɯt͡sɯ
+為真 tɑme mɑkoto
+まさに mɑsɑni
+ツートンカラー t͡sɯː tonkɑ rɑː
+崩れ落ち kɯzɯreo̞tʃi
+旅館 rʲokɑn
+生物 seːbɯtsɯ
+情 dʒo̞ɯ
+上回る ɯɯamaɯarɯ
+得票 to̞kɯço̞ɯ
+重心 dʒɯːʃin
+支配 ʃihɑi
+思いやり omoijɑri
+軍事 gɯndʒi
+しょう ʃo̞ɯ
+本音 ho̞nne
+馬 ɯmɑ
+撥 bɑtʃi
+多かっ oːkɑʔ
+出生 ʃɯʔsoː
+サイ sɑi
+トヨタホンダ tojotɑ hondɑ
+歌舞伎 kɑbɯki
+珠 tɑmɑ
+貫い t͡sɯrɑnɯi
+援護 engo̞
+学問 gɑkɯmon
+フジ ɸɯdʒi
+闘い tɑtɑkɑi
+ドジッ do̞dʒi
+日立製作所 hitatʃiseːsakɯʃo
+恥ずかしく hɑzɯkɑʃikɯ
+ピタリ pitɑri
+全額 zengɑkɯ
+度目 do̞me
+丸い marɯi
+異彩 izɑi
+重圧 dʒɯːɑt͡sɯ
+側近 so̞ʔkin
+お盆 o̞bo̞n
+ふい ɸɯi
+既に sɯdeni
+こなす konasɯ
+なので nɑ no de
+やかましく jɑkɑmɑʃikɯ
+掴ま t͡sɯkɑmɑ
+めぐる me̞gɯrɯ
+ドラッグ dorɑʔgɯ
+市民 ʃimin
+一堂 itʃido̞ɯ
+浸す hitasɯ
+細か komɑkɑ
+勢いづい ikio̞izɯi
+愛顧 ɑiko
+具体 gɯtɑi
+燃焼 nenʃo̞ɯ
+作用 sɑjoː
+ソナ sonɑ
+見つめ mit͡sɯme̞
+畑 hɑtɑke
+いいかげん ii kɑgen
+正しく tɑdɑʃikɯ
+ハンド hɑndo
+よって jo̞ʔte
+富山 tojɑmɑ
+しゃべれる ʃɑbererɯ
+わい ɯa i
+着信 tʃɑkɯʃin
+宮内庁 kɯnɑitʃoː
+狙い nerɑi
+はじける hɑdʒikerɯ
+移す ɯtsɯsɯ
+横浜市立大学 jokohɑmɑ ʃirit͡sɯ dɑigɑkɯ
+響く hibikɯ
+芸術 ge̞ːdʒɯt͡sɯ
+残さ zɑnsɑ
+温めよ ɑtɑtɑmejo
+オンリー o̞nriː
+つらかっ t͡sɯrɑkɑʔ
+補佐 hosɑ
+四肢 ʃiʃi
+迫る semaɾɯ
+呼ぼ jo̞bo̞
+遊ば ɑsobɑ
+タイミング tɑimingɯ
+ぬいぐるみ nɯigɯrɯmi
+食用 ʃo̞kɯjo̞ɯ
+バイブ bɑibɯ
+花王 kɑoː
+奪わ ɯbaɯa
+情熱 dʒo̞ɯnet͡sɯ
+秀雄 hideo̞
+ユリ jɯri
+ケネディ ke̞ne̞di
+浮い ɯi
+対談 tɑidɑn
+足腰 ɑʃikoʃi
+皆 minɑ
+見所 mido̞ko̞ro̞
+国産 kokɯsan
+晋 sɯsɯmɯ
+蔓延 mɑnən
+双方 so̞ːho̞ː
+処理 ʃo̞ri
+スペック sɯpeʔkɯ
+反動 hɑndoː
+ま mɑ
+習い事 nɑrɑigoto
+日成 niʔse̞
+ポイント po̞into̞
+しかし ʃikɑʃi
+借り入れ kɑriire
+江戸 edo̞
+セロトニン seɾo̞to̞nɪn
+あわせ ɑvɑze
+塩 ʃio̞
+テイクアウト teikɯɑɯto
+フォルクスワーゲン forɯkɯsɯwaaɡən
+救援 kʲɯːe̞n
+新型 ʃingɑtɑ
+んで nde̞
+好感 koːkɑn
+期末 kimɑt͡sɯ
+来年 rɑinen
+支払い ʃihɑrɑi
+引用 ĩjo̞ː
+起こら okorɑ
+少なかっ sɯkɯnakaʔ
+争い ɑɾɑsoj
+岩井 iɯai
+なさ nɑsɑ
+偏っ pe̞nʔ
+東京大学 tokʲo dɑigɑkɯ
+吹っ切れ ɸɯʔkire̞
+文言 mo̞ngo̞n
+大林組 oːbɑjɑʃi kɯmi
+STA stɑ
+魚釣り sakanatsɯɾi
+照れ te̞re̞
+原発 genpɑt͡sɯ
+正念場 ʃo̞ɯnendʒo̞ɯ
+入力 ɲɯːrʲo̞kɯ
+だに dɑ ni
+通常 t͡sɯːdʒo̞ɯ
+ウィーク iːkɯ
+コレクト ko̞rekɯto̞
+くるまれ kɯrɯmɑre
+キムチ kimɯtʃi
+差 sɑ
+貪欲 doɲokɯ
+輸入 jɯɲɯː
+内陸 nɑirikɯ
+偽装 giso̞ː
+まっ mɑʔ
+併用 heːjo̞ː
+工学 koːgɑkɯ
+NON no̞n
+当座 toːzɑ
+漁 rʲo̞ɯ
+生地 kidʒi
+工 ko̞ɯ
+ラオックスソディック raoʔkɯsɯsodʲiʔkɯ
+苛烈 kɑret͡sɯ
+雪舟 seʔshɯ
+でないと de nɑi to
+胆石 tɑnseki
+ともかく tomokɑkɯ
+減量 genrʲo̞ɯ
+思いやる omoijɑrɯ
+引っ張り hiʔpɑri
+線量 senrjo̞ː
+転換 tenkɑn
+恐怖 kʲo̞ɯɸɯ
+路 mitʃi
+排 hɑi
+無名 mɯme̞ː
+とらわれ toraɯare
+コンサート konsɑːto
+交渉 ko̞ɯʃo̞ɯ
+淋しい sɑbiʃiː
+放射線 hɑʊshɑːsən
+あんこ ɑnko
+退職 tɑiʃokɯ
+全土 zendo̞
+割っ ɯariʔ
+つぶれ t͡sɯbɯre̞
+東電 to̞ɯden
+商法 ʃo̞ɯho̞ɯ
+探検 tɑnken
+モード mo̞ːdo̞
+むき mɯki
+アクセス akɯsesɯ
+それから soɾekɑɾɑ
+特例 to̞kɯreː
+走る hɑʃirɯ
+晴也 hɑrɯjɑ
+一品 itʃihin
+啓之 ke̞ːdʒi
+次官 dʒikɑn
+リサーチ risɑːtʃi
+村越 mɯrɑkoʃi
+着け t͡sɯke̞
+最悪 saiakɯ
+引っ越し hiʔko̞ʃi
+狙っ nerɑʔ
+立てれ tɑtere
+サッカー sɑʔkɑː
+野犬 jɑken
+ひつ hit͡sɯ
+独自 do̞kɯdʒi
+明朝 mʲo̞ɯtʃo̞ɯ
+鳳 o̞ːto̞ri
+微妙 bimʲo̞ɯ
+未満 mimɑn
+漢 kɑn
+交流 ko̞ɯrʲɯː
+燃やし mojɑʃi
+隣国 ringo̞kɯ
+ムッ mɯ
+年末 nenmɑt͡sɯ
+歩く ɑrɯkɯ
+落馬 rɑkɯbɑ
+方位 ho̞ɯi
+トヨタキヤノン tojotɑ kijɑnon
+マザーズ mɑzɑːzɯ
+就く zɯkɯ
+安全 ɑnzen
+まっ先に mɑʔsɑkini
+リンダ rindɑ
+御堂 mido̞ɯ
+揺れ jɯre̞
+助かり tasɯkaɾi
+ご飯 gohɑn
+剤 zɑi
+元 mo̞to̞
+クオリティー kɯo̞ritiː
+焦ら ɑseɾɑ
+村民 so̞nmin
+襲わ osovɑ
+測ろ hɑkɑro
+食い止める kɯito̞merɯ
+動かす ɯɰᵝakasɯ
+現実味 ge̞ndʒit͡sɯmi
+こまめ komɑme
+あっち ɑʔtʃi
+公的 ko̞ɯteki
+役立ち jɑkɯdɑtʃi
+名車 meːʃɑ
+刃物 hɑmono
+目標 mo̞kɯço̞ɯ
+知識 tʃiʃiki
+弾け hɑdʒike
+学 gɑkɯ
+スパルタ sɯpaɾɯta
+遅れ o̞kɯre
+汲々 kʲɯːkʲɯː
+忠 tɑdɑʃi
+年輪 ne̞nrin
+送還 soːkan
+みっともない miʔtomonɑi
+ベール be̞ːrɯ
+尚更 nɑosɑɾɑ
+看護 kɑngo
+虫歯 mɯʃibɑ
+クレーム kɯre̞ːmɯ
+時間切れ dʒikɑn kire
+鈴 sɯzɯɯ
+貼れる hɑrerɯ
+交際 koːsɑi
+市営 ʃie̞ː
+溶け to̞ke
+ズバリ zɯbɑri
+動ける ɯgo̞kerɯ
+カラム kɑrɑmɯ
+もしくは moʃikɯhɑ
+望ん no̞zo̞n
+雑談 zɑt͡sɯdɑn
+申し込め mo̞ɯʃiko̞me
+わかる ɯakarɯ
+うけ ɯke̞
+詰まら t͡sɯmɑrɑ
+フロア ɸɯroɑ
+飛び to̞bi
+書く kɑkɯ
+原材料 genzɑirʲoː
+まじき mɑdʒiki
+つらく t͡sɯrɑkɯ
+アウェイ aɯei
+アニメ ɑnime
+河野 ko̞ɯno̞
+予価 jokɑ
+戦線 se̞nse̞n
+しも ʃimo̞
+女性 dʑo̞seː
+図 zɯ
+智之 to̞ʃijɯki
+抜群 bɑt͡sɯgɯn
+金物 kɑnɑmono
+芽衣 me̞i
+用い mo̞tʃii
+先頭 sento̞ː
+変え kɑe
+愛読 ɑidokɯ
+考え kɑngɑe
+登っ no̞bo̞ʔ
+ヌキ nɯki
+ベナン benɑn
+以下 ikɑ
+すい sɯi
+言霊 kotodɑmɑ
+始める hɑdʒimerɯ
+貸し手 kɑʃite
+レンタル rentɑrɯ
+落っ o̞ʔ
+アサヒ ɑsɑhi
+伸び no̞bi
+すっきり sɯʔkiɾi
+兼ね備え kɑnesonɑe
+連鎖 ɾɛnsɑ
+智 sɑtoʃi
+連戦 ɾɛnsən
+水準 suidʒun
+くぐり kɯgɯri
+呼ばわり jobaɯari
+三沢 misaɯa
+遊び心 ɑsobiɡokoro
+半面 hɑnmen
+看板 kɑnbɑn
+うなじ ɯnɑdʒi
+食べ頃 tɑbegoro
+原田 hɑrɑdɑ
+追撃 t͡sɯige̞ki
+行方 jɯkɯe̞
+振り ɸɯri
+いつか it͡sɯ kɑ
+万事休す bandʒikjɯːsɯ
+挙げ ɑge
+基盤 kibɑn
+隔世 kakɯseː
+サンダル sandaɾɯ
+松井 mɑt͡sɯi
+広がり hirogɑri
+点 te̞n
+札 satsɯsɯ
+気性 kiʃo̞ɯ
+ゴール go̞ːrɯ
+図る hɑkɑrɯ
+ちゃり tʃɑri
+決意 ke̞t͡sɯi
+鮮やか ɑzɑjɑkɑ
+群衆 gɯnʃɯː
+ガルフ gɑrɯɸɯ
+村松 mɯrɑmɑt͡sɯ
+今回 konkɑi
+穢れ kegɑre
+謝ら ɑjɑmɑrɑ
+熱心 ne̞ʔʃin
+師匠 ʃiʃo̞ɯ
+複写 ɸɯkɯʃɑ
+急ぐ isoɰᵝɑzo
+盛り上がり moriɑgɑri
+総じて so̞ːdʒite
+食事 ʃo̞kɯdʒi
+まさか mɑsɑkɑ
+乾燥 kɑnsoː
+間 mɑ
+ばい bɑi
+本心 ho̞nʃin
+ぴったり piʔtɑri
+察知 sɑtʃi
+入会 ɲɯːkɑi
+労組 ɾo̞ːso̞
+可決 kɑket͡sɯ
+ヘリ he̞ri
+見送る mio̞kɯrɯ
+人さま hitosɑmɑ
+死刑 ʃike̞ː
+提案 teːɑn
+ときめく to̞kimekɯ
+学園 gɑkɯen
+腹 hɑrɑ
+舞踊 bɯjo̞ɯ
+見下し mikɯdɑʃi
+苦難 kɯnɑn
+親しみ ʃitɑʃimi
+もっぱら moʔpɑrɑ
+土産物 mijɑgemono
+を ɯo
+依頼 irɑi
+パフォーマンス pafoːmansɯ
+ジウ dʒiɯ
+おおらか oːrɑkɑ
+つのる t͡sɯno̞rɯ
+施策 ʃisakɯ
+アルマジロ ɑrɯmɑdʒiro
+中東 tʃɯːto̞ɯ
+ファーガソン fɑːgɑson
+カルチャー kɑrɯtʃɑː
+ヒラ hirɑ
+コア koɑ
+日野自動車 hino dʒidoːʃɑ
+因果 ingɑ
+世相 seso̞ː
+なおさら nɑosɑɾɑ
+冒険 bo̞ɯken
+越し ko̞ʃi
+混み ko̞mi
+製材 seːzɑi
+鉛 nɑmɑri
+物心 mo̞no̞go̞ko̞ro̞
+がち gɑtʃi
+ファミコン ɸɑmikon
+ノーギャラ noːgʲɑrɑ
+陥っ o̞tʃiʔ
+紫外線 ʃiɡɑi̯sən
+不通 ɸɯt͡sɯː
+断た tɑtɑ
+伴う tomonɑɯ
+スープレックス sɯːpɯreʔkɯsɯ
+座れ sɯwaɾe
+待ち構え mɑtʃikɑmɑe
+真一 ʃinitʃi
+立ち入れ tɑtʃiire
+挽肉 hikinikɯ
+難局 nɑnkʲokɯ
+運動 ɯndo̞ɯ
+諦める ɑkirɑmerɯ
+失神 ʃɪʔʃɪn
+数え切れ kɑzoe kire
+焼き jɑki
+バーナンキ bɑːnɑnki
+喪主 mo̞ʃɯ
+机 t͡sɯkɯe̞
+格好 kɑʔkoː
+労働党 ro̞ɯdo̞ɯto̞ɯ
+的外れ mɑtohɑzɯre
+思いつい o̞mo̞it͡sɯi
+点字 te̞ndʒi
+将兵 ʃo̞ːheː
+平沼 hirɑnɯmɑ
+取り計らっ torihɑkɑrɑʔ
+ほくろ ho̞kɯro̞
+兼任 ke̞nnin
+スムーズ sɯmɯːzɯ
+秀で hiide̞
+スキン sɯkin
+ビール biːrɯ
+民意 mini
+樽井 tɑrɯi
+義理 giri
+富める to̞merɯ
+ませ mɑse
+相対 soːtɑi
+渡部 ɯatanabe
+打ち込ま ɯtʃikomɑ
+注力 tʃɯːrʲo̞kɯ
+いやらしい ijɑrɑʃiː
+有料 jɯːrʲo̞ɯ
+飲める no̞merɯ
+ヶ月 kɑget͡sɯ
+勝ち名乗り kɑtʃinɑnori
+デモ demo̞
+出会え deɑe
+お済み osɯmi
+列 re̞t͡sɯ
+長期間 tʃoːkikɑn
+イタチ itɑtʃi
+体質 tɑiʃit͡sɯ
+長蛇 tʃoːdɑ
+愛し ɑiʃi
+摩擦 masatsɯsɯ
+もと mo̞to̞
+激増 gekizo̞ɯ
+模型 mo̞keː
+理化学研究所 rikɑgɑkɯ kenkʲɯːʃo
+正統 seːto̞ː
+大統領 dɑitoːrʲoː
+施術 ʃidʒɯt͡sɯ
+運ん hɑkon
+すれ sɯɾe
+素顔 sɯɰᵝao
+介し kɑiʃi
+務まる t͡sɯtomɑrɯ
+広志 hiro̞ʃi
+ガバナンス gabanansɯ
+なら nɑrɑ
+嗜好 ʃiko̞ɯ
+酒 sɑke
+跳ぶ to̞bɯ
+いまや imɑ jɑ
+振るわ ɸɯrɯɯa
+おなら onɑrɑ
+西郷 sɑiɣoː
+憎しみ nikɯʃimi
+グレー gɯre̞ː
+ツアー t͡sɯɑː
+はねとばす hanetobasɯ
+戦略 senrjakɯ
+吸引 kʲɯːin
+東南アジア toːnɑn ɑdʒiɑ
+動きだす ɯɡokidasɯ
+感謝 kɑnʃɑ
+申し出る mo̞ɯʃiderɯ
+わらじ ɯaradʒi
+ソース soːsɯ
+越え ko̞e
+マン mɑn
+真っ盛り mɑʔsɑkɑri
+憂うつ jɯːɯt͡sɯ
+失っ ɯʃinɑʔ
+スタンス sɯtansɯ
+今新 imɑ ʃin
+悩まさ nɑjɑmɑsɑ
+済む sɯmɯ
+あたりまえ ɑtɑrimɑe
+切り捨てる kiɾisɯteɾɯ
+西側 niʃigaɯa
+コトバ kotobɑ
+包み隠さ tsɯtsɯmikakɯsa
+ていう te̞ iɯ
+流動的 rʲɯːdo̞ɯteki
+日曜日 nitʃijo̞ɯ hi
+改める ɑrɑtɑmerɯ
+おう o̞ɯ
+代えれ kɑere
+著書 tʃo̞ʃo̞
+三和銀行 sanɯaginkoː
+尽き t͡sɯki
+権力 kenrʲo̞kɯ
+ハエ hɑe
+そうしたら sɯʃitara
+印刷物 insatsɯbɯtsɯ
+騎手 kiʃɯ
+犬 inɯ
+汁 ʃirɯ
+ぺん pe̞n
+治 osamɯ
+秦 hɑtɑ
+凄い sɯɡoj
+適宜 te̞kigi
+捕まえ t͡sɯkɑmɑe
+迅速 jɪnsokɯ
+電球 de̞nkʲɯː
+偶数 gɯːsɯː
+補強 ho̞kʲo̞ɯ
+遅き o̞so̞ki
+一生懸命 iʔʃo̞ːkɛnmeː
+本店 ho̞nten
+備える sonaeɾɯ
+薄れ ɯsɯɾe
+届き to̞do̞ki
+マグロ mɑgɯro
+ごめん go̞men
+絵 e̞
+こだわっ kodaɯaʔ
+分裂 bɯnre̞t͡sɯ
+グッズ gɯʔzɯ
+洗剤 senzɑi
+住宅 dʒɯːtɑkɯ
+ゲッターズ get tɑːzɯ
+シカゴ ʃikɑgo
+クリーム kɯriːmɯ
+もの凄く monosɯɡokɯ
+進めよ sɯsɯmejo
+秩序 tʃit͡sɯdʒo̞
+ロースクール roːsɯkɯːrɯ
+ブリ bɯri
+美術 bidʒɯt͡sɯ
+無精髭 bɯʃoːhige
+善 ze̞n
+キャメロン kʲɑmeron
+ぼかし bokɑʃi
+翌月 jo̞kɯget͡sɯ
+励まさ hɑgemɑsɑ
+金箔 kinpɑkɯ
+点い t͡sɯi
+長谷川 haseɡaɯa
+鈍化 donkɑ
+バイノーラルマイク bɑinoːrɑrɯ mɑikɯ
+悲観 hikɑn
+順守 dʒɯnʃɯ
+基づく mo̞to̞zɯkɯ
+後払い gobɑrɑi
+のま no mɑ
+玉子 tɑmɑgo
+みか mikɑ
+近年 kinne̞n
+ナナ nɑnɑ
+ましょ mɑʃo
+ジャンル dʒɑnrɯ
+輸出入 jɯʃɯt͡sɯɲɯː
+協同 kʲo̞ɯdo̞ɯ
+光学 koːgɑkɯ
+オーディション o̞ːdiʃo̞n
+追い o̞i
+川田 kaɯata
+ナシ nɑʃi
+づらく zɯrɑkɯ
+実力 dʒit͡sɯrʲo̞kɯ
+おさまる osamaɾɯ
+わ ɯa
+過不足 kafɯsokɯ
+逸材 it͡sɯzɑi
+意外と igɑi to
+家電 kɑden
+もらい morɑi
+プロデューサー pɯrodjɯːsaː
+損益 so̞neki
+外人 gɑidʒin
+北川 kitagaɯa
+野村 nomɯrɑ
+女々しい me̞me̞ʃiː
+きょろきょろ kʲo̞ro̞kʲo̞ro̞
+積算 sekisɑn
+シンガーソングライター ʃiŋgaːsoŋgɯraitaː
+消防署 ʃo̞ɯbo̞ɯʃo̞
+指先 jɯbisaki
+見かけ mikɑke
+うた ɯtɑ
+須田 sɯda
+考案 koːɑn
+旗 hɑtɑ
+言明 ge̞nme̞ː
+通用 t͡sɯːjo̞ɯ
+消し ke̞ʃi
+急伸 kʲɯːʃin
+見直し minɑoʃi
+投手 to̞ɯʃɯ
+似 ni
+物真似 monomɑne
+測れ hɑkɑre
+少し sɯkoʃi
+負う o̞ɯ
+モラン morɑn
+すこぶる sɯkobɯɾɯ
+年金 ne̞nkin
+むくみ mɯkɯmi
+素足 sɯaʃi
+郵便 jɯːbin
+半年 hɑntoʃi
+売れ行き ɯre̞jɯki
+必要 çit͡sɯjo̞ɯ
+セックス seʔkɯsɯ
+岸田 kiʃidɑ
+みる mirɯ
+久しぶり hisaʃibɯɾi
+麺類 me̞nrɯi
+極 kʲo̞kɯ
+雄勝 ogɑt͡sɯ
+寛容 kɑɲoː
+コンサートホール konsaːtohoːrɯ
+始まっ hɑdʒimɑʔ
+メチャメチャ metʃɑmetʃɑ
+浮上 ɸɯdʒo̞ɯ
+奥さん okɯsan
+焦る aseɾɯ
+揺るぎない jɯrɯginɑi
+恐縮 kʲo̞ɯʃɯkɯ
+言える ie̞rɯ
+みえる mie̞rɯ
+円 e̞n
+してやる ʃite jɑrɯ
+フル ɸɯrɯ
+ヴルフ vɯɾɯfɯ
+重々 dʒɯːdʒɯː
+頃 ko̞ro̞
+樹 ki
+特定 to̞kɯteː
+高卒 ko̞ːso̞tsɯ
+ウーロン茶 ɯːron tʃɑ
+装着 soːtʃakɯ
+達人 tɑt͡sɯdʒin
+投融資 to̞ɯjɯːʃi
+満載 mɑnsɑi
+非国民 hiko̞kɯmin
+アイスクリーム aisɯkɯriːmɯ
+覆す kɯtsɯgaesɯ
+悔し kɯjɑʃi
+立ち向かう tɑtʃimɯkɑɯ
+見解 kenkɑi
+依然として izen to̞ ʃite
+両方 rʲo̞ɯho̞ɯ
+役に立ち jɑkɯ ni tɑtʃi
+ばり bɑri
+研一 ke̞nitʃi
+直訳 tʃokɯjɑkɯ
+経 kʲo̞ɯ
+採択 saitakɯ
+寝かせる nekaseɾɯ
+實 mino̞rɯ
+生涯 ʃoːgɑi
+パンダ pɑndɑ
+辛 ʃin
+単発 tɑnpɑt͡sɯ
+働か hɑtɑrɑkɑ
+高校生 koːkoːsɛː
+目覚める mezɑmerɯ
+駐屯 tʃɯːto̞n
+当て ɑte
+塞がり fɯsaɣaɾi
+刷り込ま sɯɾikoma
+歌唱 kɑʃoː
+偏見 he̞nke̞n
+惜しみ o̞ʃimi
+身震い mibɯrɯi
+保管 hokɑn
+顔ぶれ kɑobɯre
+王子 o̞ɯdʒi
+所持 ʃo̞dʒi
+高熱 ko̞ɯnet͡sɯ
+刈っ kɑʔ
+三昧 sɑnmɑi
+長時間 tʃoːdʒikɑn
+大きく o̞ːkikɯ
+少な sɯkɯna
+印象 inʃo̞ɯ
+不治の病 ɸɯdʒi no jɑmɑi
+ヒント hinto̞
+白 ʃiro̞
+ゾウ zo̞ɯ
+蒔く mɑkɯ
+泣け nɑke
+貴重 kitʃo̞ɯ
+悶着 mondʒɑkɯ
+がら gɑrɑ
+生殺与奪 seːsɑtsɯjodɑtsɯ
+ユニフォーム jɯniɸo̞ːmɯ
+懇談 kondɑn
+文節 bɯnsetsɯ
+我が家 ɯagaja
+ヤンニョムジャン jɑnɲomɯdʒɑn
+晴美 hɑrɯmi
+きゃ kʲɑ
+起こす okosɯ
+腎臓 dʒinzo̞ɯ
+ちゃあ tʃɑː
+悪循環 ɑkɯdʒɯnkɑn
+変わり kaɯari
+エラー erɑː
+青龍 seːɾjɯː
+取れる to̞rerɯ
+懸命 ke̞nme̞ː
+例える tɑtoerɯ
+まさしく masaʃikɯ
+矢作川 jahagi kaɯa
+広い hiro̞i
+吸収 kʲɯːʃɯː
+憤慨 ɸɯngɑi
+アウトソーシング aɯtosoːshiᵑgɯ
+日曜 nitʃijo̞ɯ
+論 ro̞n
+伝染 de̞nse̞n
+頭皮 to̞ɯhi
+ナイフ nɑiɸɯ
+根性 ko̞ndʒo̞ɯ
+財務省 zɑimɯʃoː
+話しかけ hɑnɑʃikɑke
+幸い sɑivɑi
+空く ɑkɯ
+城西国際大学 dʒoːsaikokɯsaidaigakɯ
+ばか bɑkɑ
+客船 kjakɯsen
+基 mo̞to̞
+配置 hɑitʃi
+引き渡し hikiɯataʃi
+合否 go̞ɯhi
+取り扱う toriɑt͡sɯkɑɯ
+軽減 ke̞ːge̞n
+手話 ʃɯɯa
+とどめ to̞do̞me
+おら orɑ
+務 t͡sɯto̞mɯ
+直樹 nɑoki
+故郷 ko̞kʲo̞ɯ
+激突 gekito̞t͡sɯ
+かしら kɑʃirɑ
+限ら kɑgirɑ
+森田 moritɑ
+見る mirɯ
+ポロリ po̞ro̞ri
+山野目 jɑmɑnome
+家臣 kɑʃin
+戸惑う tomɑdoɯ
+黒澤 kɯɾosawa
+抱負 ho̞ɯɸɯ
+きく kikɯ
+そっち so̞ʔtʃi
+届か todoke kɑ
+理論 riro̞n
+勝て kɑte
+スウェーデン sɯeːden
+金星 kinse̞ː
+改造 kɑizoː
+家計 kɑkeː
+ありがた ɑrigɑtɑ
+売却 bɑikʲɑkɯ
+鋳物 imo̞no̞
+対応 tɑioː
+喜ぶ jo̞ro̞ko̞bɯ
+ひな壇 çinɑdɑn
+戦局 senkjokɯ
+柱 hɑʃirɑ
+早め hɑjɑme
+進め sɯsɯme
+支え sɑsɑe
+延ばし nobɑʃi
+そそのかす sosonokasɯ
+糊 no̞ri
+挙句 ɑgekɯ
+先走り sɑkibɑʃiɾi
+装置 so̞ːtʃi
+奈良尾 nɑrɑo
+ピーチズ piː tʃizɯ
+藁 ɯara
+鉛筆 e̞npit͡sɯ
+今年度 ko̞nnendo̞
+辞め jɑme
+いたん itɑn
+反対 hɑntɑi
+準備 dʒɯnbi
+打ち明け ɯtʃiɑke
+当たっ ɑtɑʔ
+武力 bɯrʲo̞kɯ
+作る t͡sɯkɯrɯ
+哲郎 tet͡sɯro̞ɯ
+髪 kɑmi
+万里 bɑnri
+ローヴ roːvɯ
+別れ ɯakare
+部活 bɯkɑt͡sɯ
+あくまで ɑkɯmɑde
+史子 ɸɯmiko̞
+付き t͡sɯki
+湖 mizɯːmi
+進学 ʃingɑkɯ
+也 nɑ
+分岐 bɯnki
+寿江 to̞ʃie
+栗色 kɯriiro̞
+小平 ʃo̞ɯheː
+マフラー mɑɸɯrɑː
+マシン mɑʃin
+ゼリコウ zeriko̞ɯ
+公称 ko̞ɯʃo̞ɯ
+騒ぎ sɑvɑgi
+気風 kiɸɯː
+サポーター sɑpoːtɑː
+就活中 ʃɯːkɑt͡sɯtʃɯː
+島 ʃimɑ
+丸の内 mɑrɯnoɯtʃi
+リアル riɑrɯ
+市村 itʃimɯrɑ
+辻 t͡sɯdʒi
+豚児 to̞ndʒi
+立川談志 tatʃikaɯa danʃi
+飛型 higɑtɑ
+原動力 gendo̞ɯrʲo̞kɯ
+急病 kʲɯːbʲo̞ɯ
+テロリスト teɾoɾisɯto
+消 ʃo̞ɯ
+慣れれ nɑrere
+宣伝 se̞nde̞n
+スタジオ sɯtaxjo
+舞い戻っ mɑimodoʔ
+半蔵門 hɑnzoː mon
+特筆 to̞kɯhit͡sɯ
+調理 tʃo̞ɯri
+任意 nini
+うーん ɯːn
+サザンテラス sazanterasɯ
+福中 ɸɯkɯnɑkɑ
+太陽誘電 tɑijoː jɯːden
+ピーター piːtɑː
+遠吠え to̞ːbo̞e
+ジュース dʒɯːsɯ
+出会っ deɑʔ
+やり直せ jɑɾinɑose
+かわいらしい kaɯairaʃiː
+分の bɯn no̞
+やり遂げ jɑritoge
+バロテッリ bɑroterri
+綺麗 kire̞ː
+映っ ɯt͡sɯʔ
+益 e̞ki
+イタズラ itɑzɯrɑ
+帚木蓬 hɑhɑkigi jomogi
+観念 kɑnnen
+翌日 jo̞kɯdʒit͡sɯ
+摘み取り t͡sɯmito̞ri
+お子様 okosɑmɑ
+課程 kɑteː
+位置 itʃi
+振り返る ɸɯrikɑerɯ
+プリンス pɯɾinsɯ
+和敏 kɑzɯtoʃi
+ヨタ jotɑ
+割増 ɯarimaʃi
+儲かる moːkɑrɯ
+ワガママ ɯagamama
+言い渡さ iiɯatasa
+イス isɯ
+大関 o̞ːzeki
+止まっ tomɑʔ
+渡れ ɯatare
+交差点 koːsɑtɛn
+大手 o̞ːte
+蛭子 ebisɯ
+出揃っ deso̞ɾo̞ʔ
+ドル do̞rɯ
+涌井 ɯakɯi
+贈り o̞kɯri
+古館 ɸɯrɯdɑte
+他紙 tɑʃi
+オプション o̞pɯʃo̞n
+受任 dʒɯnin
+ほほ ho̞ho̞
+連敗 renpɑi
+食糧 ʃo̞kɯrʲo̞ɯ
+陣営 dʒinɑi
+募金 bo̞kin
+一芸 itʃige̞ː
+ねえ ne̞ː
+注ぐ sosoɰᵝagɯ
+美術家 bidʒɯt͡sɯkɑ
+ぱみゅぱみゅ pɑ mi pɑ mi
+バント bɑnto
+ニクソン nikɯson
+アロマ ɑromɑ
+見落とし mio̞to̞ʃi
+持た motɑ
+ごらん gorɑn
+勧誘 kanjɯː
+出番 debɑn
+要領 jo̞ɯrʲo̞ɯ
+救い sɯkɯi
+崩落 hoːrɑkɯ
+わきまえ ɯakimae
+西野 niʃino̞
+自社 dʒiʃɑ
+乗る no̞rɯ
+包丁 ho̞ɯtʃo̞ɯ
+美味しく o̞iʃikɯ
+彼氏 kɑreʃi
+満島 mit͡sɯʃimɑ
+改憲 kɑiken
+すたれる sɯtaɾeɾɯ
+照 terɑʃi
+尊氏 tɑkɑɯdʒi
+おうか oɯ kɑ
+満員 mɑnin
+およそ o̞jo̞so̞
+新婚 ʃinko̞n
+省略 ʃoːrʲɑkɯ
+義務 gimɯ
+高騰 ko̞ɯto̞ɯ
+ビブス bibɯsɯ
+テニス tenisɯ
+触手 ʃo̞kɯʃɯ
+停滞 teːtɑi
+軍部 gɯnbɯ
+転向 tenko̞ɯ
+来意 rɑii
+十河 to̞ɯgo̞ɯ
+セレブ seɾebɯ
+遅延 tʃie̞n
+凍死 to̞ɯʃi
+賑わす niɠiwasɯ
+シーン ʃiːn
+医 i
+始めれ hɑdʒimere
+直接的 tʃokɯsetsɯteki
+カレンダー kɑrendɑː
+む mɯ
+恵比寿 ebisɯ
+宮 mijɑ
+も mo̞
+ドキドキ do̞kido̞ki
+季節 kise̞tsɯ
+足かせ ɑʃikɑze
+結論 ket͡sɯro̞n
+くるん kɯrɯ n
+イップス iʔpɯsɯ
+忘却 boːkʲɑkɯ
+訂正 te̞ːse̞ː
+イビキ ibiki
+河口湖 kaɯagɯtʃiko
+取り返す toɾikaesɯ
+お嬢さん oʒoːsɑn
+怪しむ ɑjɑʃimɯ
+プライベート pɯrɑibeːto
+洞察 doːsɑtsɯ
+子ども ko̞do̞mo̞
+報復 ho̞ɯɸɯkɯ
+じ dʒi
+上矢 ɯɯaja
+受け入れ ɯke̞ire̞
+倹約 keɲakɯ
+族 zo̞kɯ
+肥田 hidɑ
+除外 dʒogɑi
+自給自足 dʒikjuːdʒisoku
+産科 sɑnkɑ
+敗北 hɑibokɯ
+なけれ nɑkere
+日電 nitʃide̞n
+最後 sɑiɣo
+オフレコ o̞ɸɯreko̞
+真人 mɑsɑto
+ライブ rɑibɯ
+独裁 dokɯsai
+遺伝 ide̞n
+書店 ʃo̞ten
+夕食 jɯːʃo̞kɯ
+嫌がらせ ijɑgɑrɑse
+色直し ironɑoʃi
+体格 tɑikɑkɯ
+会費 kɑihi
+手仕事 teʃigo̞to̞
+繋がる t͡sɯnɑgɑrɯ
+作法 sɑoː
+スリー sɯriː
+皆無 kɑimɯ
+田舎 inɑkɑ
+美しい ɯt͡sɯkɯʃiː
+簪 kɑnzɑʃi
+久しい hisɑʃiː
+っす ʔsɯ
+肥やし kojɑʃi
+宇佐見 ɯsami
+ニッタ nit tɑ
+化 bɑke
+インク inkɯ
+置い o̞i
+商店 ʃo̞ɯten
+締めつけ ʃime̞t͡sɯke̞
+名目 meːmo̞kɯ
+生む ɯmɯ
+下がっ sɑgɑʔ
+命日 me̞ːnitʃi
+流儀 rʲɯːgi
+テキスト tekisɯto
+まかせ mɑkɑse
+抱く idɑkɯ
+配ん kɯbɑn
+良かっ jokɑʔ
+アフィリエイト ɑɸirieito
+談話 danɯa
+ごっこ go̞ʔko̞
+暴論 bo̞ɯro̞n
+厄 jɑkɯ
+広場 hirobɑ
+嘘っぱち ɯsoʔpatʃi
+いじっ idʒiʔ
+細谷 hosojɑ
+読ん jo̞n
+トロ to̞ro̞
+チーター tʃiːtɑː
+引きずっ hikizɯʔ
+正確 seːkakɯ
+給食 kʲɯːʃo̞kɯ
+イケ ike̞
+カタギ kɑtɑgi
+つげ t͡sɯge̞
+ただただ tɑdɑtɑdɑ
+語録 go̞ro̞kɯ
+出場 ʃɯt͡sɯdʒo̞ɯ
+台頭 tɑitoː
+地位 tʃii
+時おり to̞kio̞ri
+ひかる çikɑrɯ
+しがみつい ʃigɑmit͡sɯi
+獲 e̞
+おもねら omonerɑ
+与え ɑtɑe
+パース paːsɯ
+玉砕 gokusai
+庶民 ʃo̞min
+源泉 gənsən
+清 kijo̞ʃi
+ベンチェ be̞ntʃe̞
+ティーン tiːn
+事故 dʒiko̞
+軽 ke̞ː
+通知 t͡sɯːtʃi
+押し目 o̞ʃime
+九州 kʲɯːʃɯː
+真っ最中 maʔsaitʃɯɯ
+監修 kɑnʃɯː
+幸一 ko̞ɯitʃi
+ふく ɸɯkɯ
+取り囲む torikɑkomɯ
+治療 tʃirʲo̞ɯ
+時人 dʒidʒin
+見つける mit͡sɯke̞rɯ
+豚 bɯtɑ
+逆行 gʲɑʔkoː
+極度 kʲo̞kɯdo̞
+俊夫 to̞ʃio̞
+スライディング sɯɾaidiŋgɯ
+山崎 jɑmɑzɑki
+放置 ho̞ɯtʃi
+ささやか sɑsɑjɑkɑ
+スポーツ sɯpoːtsɯɯ
+駒野 komɑno
+浪費 ro̞ɯhi
+言論 genro̞n
+痛 itɑ
+傷つけん kizɯt͡sɯke̞n
+サイクル saikɯɾɯ
+波打ち際 namiɯtʃi kiɯa
+メリーランド meriːrɑndo
+娶っ meto̞ʔ
+くせ kɯze
+法務大臣 hoːmɯ dɑidʒin
+例 re̞ː
+立ち tɑtʃi
+ひどい çido̞i
+追わ oɯa
+登 to̞ɯ
+諸氏 ʃo̞ʃi
+合計 go̞ːkeː
+法務大臣 hoːmɯ dɑidʒin
+着用 tʃɑkɯjoː
+余震 jo̞ʃin
+では de ɯa
+好機 ko̞ɯki
+払っ hɑrɑʔ
+ネクソン nekɯson
+かかっ kɑkɑʔ
+貫き通す tsɯranɯkitoosɯ
+汚し jo̞go̞ʃi
+時間 dʒikɑn
+心 ko̞ko̞ro̞
+仕上げ ʃiɑge
+誘導 jɯːdo̞ɯ
+授業 dʒɯgʲo̞ɯ
+出し dɑʃi
+数カ所 sɯːkaʃo
+抗 ko̞ɯ
+成立 se̞ːɾitsɯ
+起こる o̞ko̞rɯ
+涼子 rʲo̞ɯko̞
+経路 keːro̞
+何かと nɑn kɑ to
+守っ mɑmoʔ
+海底 kɑiteː
+スローガン sɯɾoːgan
+ソッコーメタボ soʔkoːmetɑbo
+訪朝 ho̞ɯtʃo̞ɯ
+翌年 jo̞kɯnen
+内閣 nɑikɑkɯ
+問わ toɯa
+削減 sakɯgen
+学び mɑnɑbi
+翁 o̞ɯ
+ジョー dʒo̞ː
+入社 ɲɯːʃɑ
+告別 ko̞kɯbet͡sɯ
+流用 rʲɯːjo̞ɯ
+なさい nɑsɑi
+センター sentɑː
+コン ko̞n
+ナニモノ nɑnimono
+スースー sɯːsɯː
+動物 do̞ɯbɯt͡sɯ
+澄まし sɯmaʃi
+口座 koːzɑ
+苦り切る nigɑri kirɯ
+肉汁 nikɯdʒɯː
+アルミニウム ɑrɯminiɯmɯ
+エルピーダ erɯpiːdɑ
+販管費 hɑnkɑnhi
+放送 ho̞ːso̞ː
+必死 hɪʔʃi
+命令 me̞ːre̞ː
+局 kʲo̞kɯ
+つながっ t͡sɯnɑgɑʔ
+れよ rejo̞
+等 to̞ɯ
+速報 sokɯhoː
+届ける to̞do̞kerɯ
+逃がさ niɰᵝɑsɑ
+ブラックホワイトグリーンピンク bɯraʔkɯ hoɯaito gɯriːn pinkɯ
+廃油 hɑijɯ
+待つ mɑt͡sɯ
+事務所 dʒimɯʃo̞
+さておき sɑteoki
+性格 seːkakɯ
+だろ dɑro
+梅津 ɯme̞zɯ
+そうして so̞ːʃite
+めくり me̞kɯri
+建造 kenzo̞ɯ
+川口 kaɯagɯtʃi
+武勇 bɯjɯː
+安倍 ɑbe
+あっ ɑʔ
+無残 mɯzɑn
+メッシ me̞sʃi
+聞い kii
+大型 oːgɑtɑ
+減る he̞rɯ
+ライフスタイル raifɯsɯtaiɾɯ
+堅物 kɑtɑbɯt͡sɯ
+合せ ɑvɑze
+清水寺 ʃimizɯ terɑ
+設置 se̞tʃi
+売り出さ ɯɾiðasa
+内面 nɑimen
+難い gɑtɑi
+刑務所 keːmɯʃo̞
+どんな donnɑ
+息づく ikizɯkɯ
+食品 ʃo̞kɯhin
+支える sasaeɾɯ
+切り上げる kiriɑgerɯ
+こむ ko̞mɯ
+四季 ʃiki
+教訓 kʲo̞ɯkɯn
+賄賂 ɯairo
+大雪 o̞ːjɯki
+意思 iʃi
+本望 ho̞nmo̞ɯ
+愛称 ɑiʃoː
+刷毛 hɑke
+軟化 nɑnkɑ
+相次ぎ ɑit͡sɯgi
+またもや mɑtɑmojɑ
+冷房 reːbo̞ː
+絵描き ekɑki
+体罰 tɑibɑt͡sɯ
+秘宝 hiho̞ɯ
+囲炉裏 iro̞ri
+個々人 ko̞ko̞nin
+いよ ijo̞
+一郎 itʃiro̞ɯ
+ふりかけ ɸɯrikɑke
+先輩 senpɑi
+新沼 niːnɯmɑ
+一抹 itʃimɑt͡sɯ
+最低限 sɑiteːgen
+圧倒 ɑʔtoː
+同志 do̞ːʃi
+入金 ɲɯːkin
+日差し hizɑʃi
+値札 neɸɯdɑ
+国母 ko̞kɯbo̞
+水辺 mizɯbe̞
+つつき t͡sɯt͡sɯki
+段 dɑn
+適格 tekikɑkɯ
+由来 jɯrɑi
+券 ke̞n
+不急 ɸɯkʲɯː
+もう少し moːsɯkoʃi
+自炊 dʒisui
+副作用 fɯkɯsajoː
+五穀豊穣 go̞ko̞kɯ ho̞ɯdʒo̞ɯ
+おかしく okɑʃikɯ
+税引き ze̞ːbiki
+うーむ ɯːmɯ
+投入 to̞ɯɲɯː
+アバウト ɑbɑɯto
+砦 to̞ride
+こと ko̞to̞
+反 hɑn
+信じよ ʃindʒijo̞
+激変 ge̞kihe̞n
+次元 dʒige̞n
+同姓 do̞ːseː
+単 tɑn
+二 ni
+弾む hɑzɯmɯ
+筋 sɯʒi
+丘 okɑ
+付き合う t͡sɯkiɑɯ
+羽 hɑne
+乳酸 ɲɯːsan
+うん ɯn
+解体 kɑitɑi
+塗る nɯrɯ
+積極 seʔkjokɯ
+おさまら osɑmɑɾɑ
+浜岡原発 hɑmɑokɑgenpɑt͡sɯ
+流動 rʲɯːdo̞ɯ
+富谷 tomijɑ
+糖尿 to̞ɯɲo̞ɯ
+ゲンカ genkɑ
+ノーベル no̞ːberɯ
+学歴 gɑkɯreki
+呼びかけ jobikɑke
+考慮 ko̞ɯrʲo̞
+先駆け sɑkigɑke
+気合 kiɑi
+記念 kine̞n
+島本 ʃimɑmoto
+模範 mohɑn
+やり直し jɑrinɑoʃi
+べく be̞kɯ
+振舞う ɸɯrɯmɑɯ
+みろ miro̞
+追いつか oit͡sɯkɑ
+テンション tenʃo̞n
+アド ɑdo
+目安 mejasɯ
+感熱 kɑnnet͡sɯ
+古臭い fɯɾɯkɯsai
+頼め tɑnome
+械 ?
+中止 tʃɯːʃi
+ギター gitɑː
+っぽい ʔpo̞i
+ゲームセンター geːmɯsentaː
+リウマチ riɯmɑtʃi
+快楽 kɑirɑkɯ
+ウド ɯdo̞
+固有 ko̞jɯː
+膜 mɑkɯ
+止め to̞me
+運 ɯn
+懸賞 kenʃo̞ɯ
+ヒバリ hibɑri
+チョトボ tʃo̞to̞bo̞
+ついでに t͡sɯide̞ ni
+楽し tɑnoʃi
+単身 tɑnʃin
+最貧 sɑihin
+仲村 nɑkɑmɯrɑ
+魅力 mirʲo̞kɯ
+おみやげ omijɑge
+益夫 masɯo
+黙々と mo̞kɯmo̞kɯ to̞
+ミニ mini
+ポリシー po̞riʃiː
+勝負 ʃo̞ɯbɯ
+払え hɑrɑe
+セミナー seminɑː
+しまう ʃimɑɯ
+絞首刑 ko̞ːʃɯ keː
+指示 ʃidʒi
+いちばん itʃibɑn
+めざす mezasɯ
+計測 keːsokɯ
+食い込み kɯiko̞mi
+カメレオン kɑmereon
+かつ kɑt͡sɯ
+非 hi
+線 se̞n
+既 sɯnde
+割り切っ ɯarikiʔ
+アリ ɑri
+くらい kɯrɑi
+ずらし zɯrɑʃi
+育た sodɑtɑ
+一員 itʃiin
+気持ちいい kimo̞tʃi iː
+お預け oɑzɯke
+自意識 dʒi iʃiki
+示そ ʃimeso̞
+癒 ie̞
+ダメ dɑme
+実業 dʒit͡sɯgʲo̞ɯ
+一杯 iʔbɑi
+ノリ no̞ri
+秋冬 ɑkiɸɯjɯ
+施錠 seʒo̞ː
+竹林 tʃikɯrin
+分断 bɯndɑn
+タカ tɑkɑ
+財 zɑi
+オリーリー o̞riːriː
+夫君 ɸɯkɯn
+大麦 o̞ːmɯgi
+マッドネス madonesɯ
+在京 zɑikʲoː
+まじめ mɑdʒime
+後尾 ko̞ɯbi
+止める to̞merɯ
+通話 t͡sɯːɯa
+高台 tɑkɑdɑi
+踏め ɸɯme̞
+状 dʒo̞ɯ
+大曲 tɑikʲokɯ
+サーモスタット saːmosɯtaʔto
+局員 kʲo̞kɯin
+有力 jɯːrʲo̞kɯ
+ボックス boʔkɯsɯ
+ナカ nɑkɑ
+またし mɑtɑ ʃi
+そもそも so̞mo̞so̞mo̞
+仕方 ʃikɑtɑ
+脅し o̞do̞ʃi
+呪文 dʒɯmo̞n
+交錯 koːsakɯ
+調査 tʃoːsɑ
+私立 ʃirit͡sɯ
+日経 niʔke̞ː
+役に立つ jɑkɯ ni tɑt͡sɯ
+暗号 ɑngoː
+高い tɑkɑi
+動的 do̞ɯteki
+危なっかしく ɑbɯnɑʔkɑʃikɯ
+本選 ho̞nsen
+あきまへん ɑkimɑhen
+ぬるり nɯrɯri
+盛り上がる moriɑgɑrɯ
+重症 dʒɯːʃo̞ɯ
+受け入れろ ɯkeirero̞
+絡み合う kɑrɑmiɑɯ
+出せ dɑze
+パワフル paɯaɸɯrɯ
+引く hikɯ
+健嗣 ke̞ndʒi
+揃っ so̞ɾo̞ʔ
+財務 zɑimɯ
+お手本 o̞teho̞n
+とうとう to̞ɯto̞ɯ
+逓減 te̞ːge̞n
+として to̞ ʃite
+もはや mohɑjɑ
+改めて ɑrɑtɑmete
+目立つ medɑt͡sɯ
+煮 ni
+くだろ kɯdɑro
+える e̞rɯ
+凍ら koːrɑ
+揚句 ɑgekɯ
+取り外し torihɑzɯʃi
+エイリアンフェチ eiriɑn ɸetʃi
+めんどう mendo̞ɯ
+陣 dʒin
+はじめて hɑdʒimete
+甲子園 ko̞ɯʃien
+ダンナ dɑnnɑ
+切り返す kiɾikaesɯ
+毎朝 mɑjɑsɑ
+乗船 dʒɑʊ̯sən
+転送 tenso̞ː
+筆者 hɪʔʃɑ
+醸し kɑmoʃi
+吸汗速 kjɯːkansokɯ
+チャンス tʃansɯ
+恵 me̞gɯmi
+指定 ʃite̞ː
+秘め hime̞
+滅ん ho̞ro̞n
+協業 kʲo̞ɯgʲo̞ɯ
+レベル re̞be̞rɯ
+周り maɯari
+黄 ki
+著作 tʃosakɯ
+休日 kʲɯːdʒit͡sɯ
+すっごい sɯʔgoi
+梅原 ɯmehɑrɑ
+グルメ gɯrɯme̞
+口説く kɯdo̞kɯ
+入念 ɲɯːne̞n
+呼び出さ jobidɑsɑ
+短期間 tɑnkikɑn
+シン ʃin
+ついつい t͡sɯit͡sɯi
+カルビー kɑrɯbiː
+クルトガ kɯrɯ togɑ
+ハードカバー hɑːdokɑbɑː
+険悪 kenakɯ
+貸 kɑʃi
+気持ちよかっ kimotʃi jokɑʔ
+加重 kɑdʒɯː
+いっそ iʔso̞
+持参 xisɑn
+転作 tensakɯ
+トドメ to̞do̞me
+厳粛 ge̞nʃɯkɯ
+電機株 denki kɑbɯ
+しゃべり ʃɑberi
+につれて ni t͡sɯre̞te̞
+徳間書店 tokɯmɑ ʃoten
+堅く kɑtɑkɯ
+車内 ʃɑnɑi
+すばらしさ sɯbaɾaʃiza
+餌 ezɑ
+ディスク disɯkɯ
+年度内 nendonɑi
+自身 dʒiʃin
+スタンド sɯtando
+駆除 kɯdʒo̞
+骨盤 kot͡sɯbɑn
+業界 gʲoːkɑi
+はう hɑɯ
+モンスタークライアント monsɯtaːkɯrajanto
+青木 ɑoki
+無意味 mɯimi
+顧問 ko̞mo̞n
+すばらしかっ sɯbaɾaʃikaʔ
+実装 dʒiʔsoː
+森山 morijɑmɑ
+喧嘩 kenkɑ
+教えよ o̞ʃiejo̞
+長年 nɑgɑnen
+初めて hɑdʒimete
+苛立ち irɑdɑtʃi
+お客 okʲɑkɯ
+黒板 kokɯbɑn
+頑張っ gɑnbɑʔ
+来賓 rɑihin
+他 hokɑ
+師範 ʃihɑn
+タイ tɑi
+気迫 kihɑkɯ
+萌 mo̞e
+旧制 kjɯːseː
+戦闘 sento̞ː
+ゴボウ go̞bo̞ɯ
+下山 gezɑn
+砧 kinɯtɑ
+スケール sɯkeːɾɯ
+いくつ ikɯt͡sɯ
+玉石混交 gjokusekikonkou
+問い合わせ tojaɯase
+呼び戻す jobimodosɯ
+タングステンツクイ taŋgɯsɯtentsɯkɯi
+染 so̞me
+高僧 ko̞ːso̞ː
+滑り落ち sɯberioki
+持ち込む mo̞tʃiko̞mɯ
+ジャニーズタレント dʒɑniːzɯ tɑrento
+氾濫 hɑnrɑn
+それと so̞ɾeto̞
+福山 ɸɯkɯjɑmɑ
+だんだん dɑndɑn
+初心者 ʃoʃinʃɑ
+過ぎる sɯgiɾɯ
+申し込み mo̞ɯʃiko̞mi
+用法 jo̞ɯho̞ɯ
+死去 ʃikʲo̞
+リポート ripo̞ːto̞
+運勢 ɯnsej
+快 kɑi
+下回る ʃitamaɯarɯ
+購入 ko̞ɯɲɯː
+教会 kʲoːkɑi
+終息 shɯːsokɯ
+気軽 kigɑrɯ
+大腸 dɑitʃoː
+役 jɑkɯ
+待っ mɑʔ
+つる t͡sɯrɯ
+スッキリ sɯʔkiɾi
+っていう ʔte̞ iɯ
+深セン ʃinsən
+キケン kike̞n
+誤報 go̞ho̞ɯ
+真希 mɑki
+負担 ɸɯtɑn
+ハンター hɑntɑː
+拭き ɸɯki
+ばっかり bɑʔkɑri
+記憶 kio̞kɯ
+喋っ ʃɑbeʔ
+粗 ɑrɑ
+言い出し ii dɑʃi
+あまりに ɑmɑri ni
+湖畔 kohɑn
+ちびちび tʃibitʃibi
+羊 hit͡sɯdʒi
+交わさ kaɯasa
+寒 kɑn
+身じろぎ midʒiro̞gi
+用品 jo̞ɯhin
+一人暮らし hitorigɯrɑʃi
+店舗 tenpo̞
+三河安城 mikaɯa andʒoː
+考えよ kɑngɑejo
+鬼頭 onigɑʃirɑ
+でも de mo̞
+たり tɑri
+集計 ʃɯːke̞ː
+信念 ʃinne̞n
+コンサルタント konsaɾɯtanto
+全校 zenko̞ɯ
+湧き出 ɯakide
+迷い mɑjoi
+ダンディー dɑndiː
+クルマ kɯrɯmɑ
+遼 rʲo̞ɯ
+活躍 kɑt͡sɯjɑkɯ
+記す ʃiɾɯsɯ
+憤る ikido̞ːrɯ
+勝敗 ʃoːhɑi
+明暗 meːɑn
+ひとたび hitotɑbi
+遊戯 jɯːgi
+新井 ɑrɑi
+お座敷 ozɑʃiki
+スーツ sɯːtsɯ
+質感 ʃit͡sɯkɑn
+うるさくっ ɯɾɯsakwʔ
+十字 dʒɯːdʒi
+鉱山 koːzɑn
+生じる ʃo̞ɯdʒirɯ
+民間 minkɑn
+兵庫 ço̞ɯgo̞
+やん jɑn
+歌声 ɯtɑgoe
+漁港 gʲo̞ko̞ɯ
+色濃く iro̞ ko̞kɯ
+有益 jɯːe̞ki
+葛藤 kɑʔtoː
+歩美 ɑjɯmi
+俺 o̞re
+スケジュール sɯkejɯːrɯ
+ホフマン hoɸɯmɑn
+招き mɑneki
+られる rɑrerɯ
+秩父宮 tʃitʃibɯnomijɑ
+外交 gɑikoː
+引き締まる hikiʃimɑrɯ
+快挙 kɑikʲo
+食害 ʃokɯgɑi
+言 ge̞n
+含め ɸɯkɯme̞
+つきよ t͡sɯkijo̞
+ロチェスター rokesɯtaː
+しまわ ʃimaɯa
+冒頭 bo̞ɯto̞ɯ
+暮らせる kɯɾaseɾɯ
+舌 ʃitɑ
+環 kɑn
+公用 ko̞ɯjo̞ɯ
+後天的 ko̞ɯtenteki
+バスケットボール basɯkeʔtoboːrɯ
+励む hɑgemɯ
+生命 se̞ːme̞ː
+レバノン rebɑnon
+軽く kɑrɯkɯ
+ディズニーランド dizɯniː rɑndo
+働きがい hɑtɑrɑkigɑi
+承り ɯketamaɯari
+補正 ho̞seː
+切ら kirɑ
+口笛 kɯtʃibɯe̞
+誠司 se̞idʒi
+無印 mɯdʒirɯʃi
+性教育 seːkjoːikɯ
+浅村 asamɯra
+音大 ondɑi
+思い出し omoidɑʃi
+切り出し kiridɑʃi
+走り書き hɑʃirigɑki
+射る irɯ
+原子力 genʃirʲo̞kɯ
+発売 hɑt͡sɯbɑi
+アシアン ɑʃi ɑn
+天気 te̞nki
+財閥 zɑibɑt͡sɯ
+ルーズ rɯːzɯ
+美和 miɯa
+口外 koːgɑi
+俊太郎 ʃɯntɑroː
+ロープ ro̞ːpɯ
+示唆 ʃisɑ
+キュート kʲɯːto̞
+甘える ɑmɑerɯ
+室 mɯro̞
+和む nɑgomɯ
+ふだん ɸɯdɑn
+息 iki
+豊 jɯtɑkɑ
+教科書 kʲoːkɑʃo
+ひとえに hito̞eni
+美貌 bibo̞ɯ
+拍車 hɑkɯʃɑ
+試算 ʃisɑn
+天津 te̞nʃin
+持論 dʒiro̞n
+メン me̞n
+伏兵 ɸɯkɯhe̞ː
+なんと nɑn to
+サザエさん sɑsɑesɑn
+女子高 dʒo̞ʃiko̞ɯ
+王座 oːzɑ
+敷い ʃii
+死ん ʃin
+うなずける ɯnɑzɯkerɯ
+なに nɑni
+鍵 kɑgi
+ガマン gɑmɑn
+かゆし kɑjɯ ʃi
+飯野 iːno̞
+飲まさ nomɑsɑ
+唯一 jɯːit͡sɯ
+皿洗い sɑɾɑɑɾɑi
+音声 o̞nseː
+下條 ʃimo̞dʒo̞ɯ
+ワケ ɯake
+職務 ʃo̞kɯmɯ
+場所 bɑʃo
+ナビゲーター nɑbigeːtɑː
+うろこ ɯro̞ko̞
+摂ら torɑ
+うかがえ ɯkɑgɑe
+快眠 kɑimin
+容疑 jo̞ɯgi
+英紙 e̞ːʃi
+何故 nɑze
+昨年 sakɯnen
+バリュー bɑrʲɯː
+建て直せ tɑtenɑose
+戦後 seŋgo̞
+盗聴 to̞ɯtʃo̞ɯ
+誤り ɑjɑmɑri
+法 ho̞ɯ
+基板 kibɑn
+年齢 ne̞nre̞ː
+来週 rɑiʃɯː
+裸 hɑdɑkɑ
+争う aɾasoɯ
+雪景色 jɯki ke̞ʃiki
+背面 hɑimen
+逆 gʲɑkɯ
+五月晴れ satsɯkibaɾe
+しがらみ ʃigɑrɑmi
+通産 tsɯːsan
+色 iro̞
+抜け nɯke̞
+居る irɯ
+癇癪 kɑnʃɑkɯ
+態度 tɑido
+暗唱 ɑnʃoː
+売れる ɯre̞rɯ
+ともに to̞mo̞ ni
+合格 goːkɑkɯ
+新生 ʃinse̞ː
+投げれ nɑgere
+見せつけ mise̞tsɯke̞
+関脇 sekiɯake
+墜落 t͡sɯirɑkɯ
+覚める sameɾɯ
+着替え kigɑe
+ナスダック nasɯdaʔkɯ
+結びつい mɯsɯbitsɯi
+支給 ʃikʲɯː
+冤罪 enzɑi
+戦況 seŋkjo̞ː
+担当 tɑntoː
+ドゥラララー dɯrɑrɑrɑː
+禁煙 kinən
+トラウマ torɑɯmɑ
+変わる kaɯarɯ
+杯 sakazɯki
+アイディア ɑidiɑ
+件数 kensɯː
+いろんな ironnɑ
+感情 kɑndʒoː
+議定 gite̞ː
+バター bɑtɑː
+滝 tɑki
+釣り鐘 t͡sɯrigɑne
+代々 dɑidɑi
+完治 kɑntʃi
+カノジョ kɑnodʒo
+見込ん miko̞n
+安らぎ jasɯɾagi
+老後 ro̞ɯgo̞
+製造 seːzo̞ː
+農村 no̞ːso̞n
+バラバラ bɑrɑbɑrɑ
+見せ物 misemo̞no̞
+気迷い kimɑjoi
+万全 bɑnzen
+大切 tɑisetsɯ
+機密 kimit͡sɯ
+決まる kimɑrɯ
+伺う ɯkɑgɑɯ
+スナック sɯnakekɯ
+酔っ払う joʔpɑrɑɯ
+カタツムリ kɑtɑt͡sɯmɯri
+でかい dekɑi
+松木 mɑt͡sɯgi
+成人 se̞ːjin
+襲っ o̞so̞ʔ
+吸汗性 kjɯːkɐnseː
+ルリ rɯri
+案内 ɑnnɑi
+祝う iɯaɯ
+菅 kɑn
+面接 me̞nse̞tsɯ
+繰上げ kɯriɑge
+手本 teho̞n
+根深い nebɯkɑi
+ギラギラ girɑgirɑ
+深い bɯkɑi
+フィルター ɸirɯtɑː
+断然 dɑnzen
+違反 ihɑn
+読ま jomɑ
+必然 hit͡sɯze̞n
+操る ɑjɑt͡sɯrɯ
+三浦 miɯrɑ
+真っ向 mɑʔkoː
+負債 fɯzai
+生み ɯmi
+有限 jɯːge̞n
+王室 o̞ɯʃit͡sɯ
+幸吉 ko̞ɯkitʃi
+掘る ho̞rɯ
+ヤン jɑn
+楢葉 nɑrɑhɑ
+生産 seːsɑn
+松永 mɑt͡sɯnɑgɑ
+下記 kɑki
+郎 ro̞ɯ
+好ましく konomɑʃikɯ
+親戚 ʃinse̞ki
+オバア obɑː
+第一歩 dɑiiʔ po
+市町村 ʃiːcɯsɔnː
+演繹 e̞ne̞ki
+怠ら okotɑrɑ
+観賞 kɑnʃoː
+蓋 ɸɯtɑ
+濃く ko̞kɯ
+評し ço̞ɯʃi
+受信 dʒɯʃin
+絶滅 ze̞t͡sɯme̞t͡sɯ
+延長 entʃo̞ɯ
+デジ de̞dʒi
+作り t͡sɯkɯri
+面 me̞n
+離れる hɑnɑrerɯ
+赤 ɑkɑ
+公算 koːsɑn
+達成 tɑʔseː
+ミクロ mikɯro̞
+過敏 kɑbin
+肝 kimo̞
+相馬 soːmɑ
+節 ɸɯʃi
+浜 hɑmɑ
+情報 dʒo̞ɯho̞ɯ
+逆立ち sɑkɑdɑtʃi
+自陣 dʒidʒin
+直後 tʃo̞kɯgo̞
+連日 re̞ndʒit͡sɯ
+句 kɯ
+何かしら nɑn kɑʃirɑ
+貸し kɑʃi
+世間 se̞ke̞n
+欲し ho̞ʃi
+東側 higaʃigaɯa
+ディーエヌエー diːe̞nɯe̞ː
+前向き mɑemɯki
+危機 kiki
+宇津木 ɯt͡sɯki
+クレイ kɯre̞ː
+ソニー so̞niː
+臭く kɯsakɯ
+年上 to̞ʃiɯe
+文化 bɯnkɑ
+中里 nɑkɑsɑto
+粒々 t͡sɯbɯt͡sɯbɯ
+一つ hito̞t͡sɯ
+系 ke̞ː
+形状 keːdʒo̞ː
+聴き kiki
+税関 zeːkɑn
+言え ie̞
+経理 ke̞ːri
+太田 oːtɑ
+樹里 dʒɯri
+ゲテモノ getemo̞no̞
+協議 kʲo̞ɯgi
+宮古 mijɑko
+育つ sodatsɯɯ
+走り hɑʃiri
+でっ de̞ʔ
+着手 tʃɑkɯʃɯ
+抗生 ko̞ːseː
+亀田 kɑmedɑ
+大人気 dɑininki
+ゲーミフィケーション geːmiɸikeːʃo̞n
+ガンガン gɑngɑn
+崩れ kɯzɯre̞
+入国 ɲɯːko̞kɯ
+間もなく mɑ mo nɑkɯ
+大和 jɑmɑto
+今年 ko̞to̞ʃi
+学ば mɑnɑbɑ
+大阪大学 osakadaigakɯ
+異常 idʒo̞ɯ
+グチ gɯtʃi
+審議 ʃingi
+改定 kɑiteː
+癒す ijasɯ
+心的 ʃinte̞ki
+水源 søyxən
+江 ko̞ɯ
+雅子 mɑsɑko
+恥 hɑdʒi
+遭遇 soːɡɯː
+ここ ko̞ko̞
+召集令状 ʃo̞ːʃɯː reːdʒo̞ɯ
+民主党 minʃɯto̞ɯ
+乗用車 dʒoːjoːʃɑ
+伴 to̞mo̞
+属性 zokɯseː
+無関係 mɯkɑnkeː
+さておい sɑteoji
+社会 ʃɑkɑi
+立ち直る tɑtʃinɑorɯ
+返す kaesɯ
+売り手 ɯrite̞
+歌い ɯtɑi
+読み取っ jo̞mito̞ʔ
+疑問符 gimo̞nɸɯ
+橋渡し haʃiɯataʃi
+ありがたい ɑrigɑtɑi
+こちら kotʃirɑ
+右足 migiɑʃi
+岡崎 okɑzɑki
+しつこい ʃit͡sɯko̞i
+腸 tʃo̞ɯ
+指揮 ʃiki
+人ごみ hito̞go̞mi
+渡る ɯatarɯ
+手伝う tet͡sɯdɑɯ
+状況 dʒo̞ɯkʲo̞ɯ
+升 masɯ
+起訴 kiso̞
+顧慮 ko̞rʲo̞
+パチンコ pɑtʃinko
+モフモフ mo̞ɸɯmo̞ɸɯ
+挑戦 tʃɑʊ̯sən
+本島 ho̞nto̞ɯ
+伸び伸び no̞bino̞bi
+パワハラ paɯahara
+固体 kotɑi
+今月 ko̞nget͡sɯ
+世界中 sekaitʃɯː
+聴取 tʃo̞ɯʃɯ
+液体 ekitɑi
+運用 ɯnjo̞ː
+少年 ʃo̞ɯnen
+切り抜け kirinɯke̞
+厚かっ ɑt͡sɯkɑʔ
+湯船 jɯbɯne̞
+ネギ ne̞gi
+つきっきり t͡sɯkiʔkiri
+オーバー oːbɑː
+ハブ hɑbɯ
+カーボン kɑːbon
+サイタマニア sɑitɑmɑniɑ
+立っ tɑʔ
+気配 kehɑi
+領内 rʲoːnɑi
+け ke̞
+菜月 nɑt͡sɯki
+大使館 tɑiʃikɑn
+キョン kʲo̞n
+待て mɑte
+多め o̞ːme
+ゴマ gomɑ
+取り to̞ri
+コーチング ko̞ːtʃingɯ
+宿題 ʃɯkɯdɑi
+そっくり soʔkɯri
+いら irɑ
+乗り継ぐ no̞rit͡sɯgɯ
+哲 ɑkirɑ
+歌謡 kɑjoː
+順調 dʒɯntʃo̞ɯ
+タクト tɑkɯto
+洋服 jo̞ɯɸɯkɯ
+イライラ irɑirɑ
+見返し mikɑeʃi
+意味 imi
+匂い nio̞i
+ちゃっ tʃɑʔ
+ブログ bɯro̞gɯ
+代わっ kaɯaʔ
+近ごろ tʃikɑgoro
+朝方 ɑsɑɣɑtɑ
+基準 kidʒɯn
+友 to̞mo̞
+かっこう kɑʔkoː
+利く kikɯ
+ファクト ɸɑkɯto
+またとない mɑtɑ to nɑi
+賢 ke̞n
+ポンタ pon tɑ
+ニューヨーク ɲɯːjo̞ːkɯ
+意義 igi
+削っ ke̞zɯʔ
+大丈夫 dɑidʒoːbɯ
+腐敗 ɸɯhɑi
+麻痺 mɑhi
+置換 tʃikɑn
+パトロール pɑtoroːrɯ
+心地 ko̞ko̞tʃi
+平澤 hiɾasaɯa
+市長 ʃitʃo̞ɯ
+妊娠 ninʃin
+ソファ sofɑ
+鼻くそ hanakɯso
+出尽くし de̞ t͡sɯkɯʃi
+お年玉 otoʃidɑmɑ
+欠か ket͡sɯ kɑ
+顔面 gɑnmen
+迫ら sɑkorɑ
+取り戻せる toɾimodoseɾɯ
+流出 rʲɯːʃɯt͡sɯ
+さよなら sɑjonɑɾɑ
+浮 ɯki
+航空 ko̞ɯkɯː
+渡御 to̞gʲo̞
+東証 to̞ɯʃo̞ɯ
+入 iri
+いくら ikɯrɑ
+謀略 boːrʲɑkɯ
+診断 ʃindɑn
+ピル pirɯ
+人類 dʒinrɯi
+ハーフタイム hɑːɸɯ tɑimɯ
+放さ hɑnɑsɑ
+関連 kɑnren
+込め ko̞me
+顔色 kɑoiro
+他方 tɑhoː
+待ち遠しい mɑtʃidoːʃiː
+起き上がっ okiɑgɑʔ
+寸法 sɯnpoː
+入札 ɲɯːsatsɯ
+文 bɯn
+着任 tʃɑkɯnin
+ばば bɑbɑ
+特捜 tokɯsoː
+作 sakɯ
+アキュラシステム akjɯraʃisɯtemɯ
+タイヤ tɑijɑ
+採算 sɑisɑn
+富裕 ɸɯjɯː
+全て sɯbete
+ヤマハ jɑmɑhɑ
+合法 go̞ɯho̞ɯ
+届く to̞do̞kɯ
+おすすめ osɯsɯme
+折しも o̞ri ʃimo̞
+慎重 ʃintʃo̞ɯ
+松田 mɑt͡sɯdɑ
+公立大 koːrit͡sɯdɑi
+情緒 dʒo̞ɯtʃo̞
+せっかち seʔkɑtʃi
+挽き hiki
+中神 nɑkɑgɑmi
+娯楽 gorɑkɯ
+見世物 misemo̞no̞
+御託 gotɑkɯ
+食らえ kɯrɑe
+エンクロージャ enkɯroːdʒɑ
+複数 fɯkɯsɯː
+薄氷 hɑkɯçoː
+マニフェスト manifesɯto
+耐久 tɑikʲɯː
+焼却 ʃoːkʲɑkɯ
+体育館 tɑiikɯkɑn
+脆く mo̞ro̞kɯ
+苦慮 kɯrʲo̞
+進路 ʃinro̞
+向上 ko̞ɯdʒo̞ɯ
+問題 mondɑi
+バズマーケティング bɑzɯ mɑːketingɯ
+争議 so̞ːɡi
+年貢 ne̞ngɯ
+ガラ gɑrɑ
+遥 hɑrɯkɑ
+ジャーナリスト dʒaːnarisɯto
+引き上げ hikiɑge
+スロベニア sɯɾobenia
+易々 jasɯjasɯ
+国連 ko̞kɯren
+追い込ま oikomɑ
+やたら jɑtɑrɑ
+十中八九 dʒɯːtʃɯː hɑtʃi kʲɯː
+ガラマンテス gaɾamantesɯ
+敵陣 te̞kidʒin
+載せ no̞ze
+洗い出し ɑrɑidɑʃi
+奪う ɯbɑɯ
+アプリ ɑpɯri
+航続 ko̞ɯzo̞kɯ
+たま tɑmɑ
+浜田 hɑmɑdɑ
+競い kio̞i
+さあ sɯɑ
+中心 tʃɯːʃin
+取り戻し to̞rimo̞do̞ʃi
+有効 jɯːko̞ɯ
+閉まっ ʃimɑʔ
+小正月 ʃoːʃoːgɑt͡sɯ
+河田 kaɯada
+外 so̞to̞
+大家 oːjɑ
+帯状疱疹 tɑidʒoː hoːʃin
+制作 seːsakɯ
+司会 ʃikɑi
+本拠地 ho̞nkʲo̞ tʃi
+弾 tɑmɑ
+苦手 nigɑte
+願う negɑɯ
+あたたか ɑtɑtɑkɑ
+自民 dʒimin
+なんて nɑnte
+未 hit͡sɯdʒi
+創出 soːshɯt͡sɯ
+契約 keːjɑkɯ
+アチッ ɑ tʃi
+怒ら okorɑ
+ピアノ piɑno
+現代 gendɑi
+問う to̞ɯ
+テレビ局 terebi kʲo̞kɯ
+でん de̞ n
+中間 tʃɯːkɑn
+打た ɯtɑ
+快感 kɑikɑn
+興味深い kʲoːmi bɯkɑi
+それで so̞ɾede
+エキス ekisɯ
+先送り sakiokɯrʲi
+激甚 ge̞kidʒin
+影 kɑge
+ホール ho̞ːrɯ
+炭酸 tɑnsɑn
+テイラー teirɑː
+とまり tomɑri
+伊藤 ito̞ɯ
+製薬 seːjakɯ
+潰す tsɯbɯsɯ
+ヤマト運輸 jamatoːɲjɯ
+重層 dʒɯːsoː
+よっぽど jo̞ʔpo̞do̞
+突い t͡sɯi
+理髪 rihɑt͡sɯ
+組立 kɯmitɑte
+それだけに soredɑkeni
+傾い kɑtɑmɯi
+小名浜 onɑhɑmɑ
+豆 mɑme
+明子 ɑkiko
+スリリング sɯɾiɾiŋgɯ
+繰り返さ kɯɾikaesa
+プログラミング pɯrogɯrɑmingɯ
+撮ら torɑ
+乗り換え norikɑe
+身柄 migɑrɑ
+低調 teːtʃo̞ɯ
+震える ɸɯrɯe̞rɯ
+はね ɯa ne
+体形 tɑːkeː
+則 no̞ri
+晩餐 bɑnsɑn
+維持 idʒi
+増え ɸɯe̞
+耳 mimi
+止まり tomɑri
+思い入れ o̞mo̞iire
+色々 iro̞iro̞
+合同 go̞ɯdo̞ɯ
+いか ikɑ
+ほぼ ho̞bo̞
+や jɑ
+抑制 jokɯseː
+味噌汁 misoʃiɾɯ
+楽しん tɑnoʃin
+同じ onɑdʒi
+マジメ mɑdʒime
+豹変 ço̞ɯhen
+足どり ɑʃidori
+伝 t͡sɯte̞
+蒸し暑い mɯʃiɑt͡sɯi
+インゲン inge̞n
+支局 ʃikʲo̞kɯ
+突き放す tsɯkihanasɯ
+開き直る hirɑkinɑorɯ
+空回り karamaɯari
+束ねる tɑbɑnerɯ
+ソフトバンク sofɯtobankɯ
+日立 hitɑtʃi
+窮 kʲɯː
+仲良し nɑkɑjoʃi
+演説 e̞nze̞t͡sɯ
+行き着く ikit͡sɯkɯ
+古川 ɸɯrɯkaɯa
+康 jasɯʃi
+遍在 henzɑi
+用語 jo̞ɯgo̞
+原理 ge̞nri
+毛皮 kegaɯa
+慰労 iro̞ɯ
+諸君 ʃo̞kɯn
+描く egɑkɯ
+ストレス sɯtoɾesɯ
+原油 dʒenjɯ
+軽傷 keːʃo̞ɯ
+絵柄 egɑrɑ
+症 ʃo̞ɯ
+為 tɑme
+コレクション ko̞rekɯʃo̞n
+追う o̞ɯ
+金田 kɑnedɑ
+菌 kin
+退院 tɑiin
+割烹 kɑʔpoː
+等地 to̞ɯtʃi
+たずね tɑzɯne
+砂糖 sɑtoː
+公 ko̞ɯ
+声明 se̞ːme̞ː
+着く t͡sɯkɯ
+なんとも nɑn to mo
+武吉 tɑkejoʃi
+置き忘れ okiwasɯre
+釜石 kɑmɑiʃi
+なるほど nɑrɯhodo
+見つけ mit͡sɯke̞
+イルカ irɯkɑ
+グリー gɯriː
+粛々と ʃɯkɯʃɯkɯ to̞
+ごはん gohɑn
+マント mɑnto
+匡人 mɑsɑto
+ウォーター oːtɑː
+若い ɯakai
+経て he̞te̞
+徴兵 tʃo̞ːheː
+合い ɑi
+一足 isokɯ
+プッシュ pɯsʃɯ
+タクシー tɑkɯʃiː
+由々しき jɯjɯʃiki
+広告 ko̞ɯko̞kɯ
+全員 ze̞nin
+腕 ɯde̞
+両氏 rʲo̞ɯʃi
+大輪 tɑirin
+木嶋 kidʒimɑ
+靴 kɯt͡sɯ
+うつ ɯt͡sɯ
+呼べ jo̞be
+ウジ ɯdʒi
+近衛 ko̞no̞e
+前夜 zeɲɑ
+よほど jo̞ho̞do̞
+費やす tsɯijasɯ
+いじら idʒirɑ
+五輪 go̞rin
+年収 ne̞nʃɯː
+こなし konɑʃi
+軽水炉 keːsɯiɾo
+灸 kʲɯː
+集まら ɑt͡sɯmɑrɑ
+かきまわし kakimaɯaʃi
+ヒョンジュン ço̞ndʒɯn
+村人 mɯrɑbito
+逃亡 to̞ɯbo̞ɯ
+コジマ kodʒimɑ
+こめ ko̞me
+地域 tʃiiki
+被害 higɑi
+ママ mɑmɑ
+乗り no̞ri
+ファイル ɸɑirɯ
+裁量 sɑiɾjoː
+嘉 jo̞ʃimi
+感傷 kɑnʃoː
+新鋭 ʃinɑi
+福島 ɸɯkɯʃimɑ
+転がっ korogɑʔ
+卒 sotsɯɯ
+巻き起こす makiokosɯ
+憎ん nikɯn
+ニューズ ɲɯːzɯ
+認識 ninʃiki
+乱立 rɑnrit͡sɯ
+おごり o̞go̞ri
+所作 ʃo̞zɯ
+風貌 ɸɯːbo̞ɯ
+最期 sɑiɣo
+教員 kʲo̞ɯin
+伝説 de̞nse̞tsɯ
+ジビエ dʒibie̞
+浴び ɑbi
+ボーカル boːkɑrɯ
+香山 kɑjɑmɑ
+締まり ʃimɑri
+さぞ sɑzo
+あまりの ɑmɑri no
+掃除 so̞ːʒi
+売り込み ɯriko̞mi
+ごとき go̞to̞ki
+シチュエーション ʃitʃɯeːʃo̞n
+北部 ho̞kɯbɯ
+防衛 bo̞ːeː
+フライ ɸɯrɑi
+倒引当 toːhikiɑte
+歯車 hɑgɯrɯmɑ
+傷つく kizɯt͡sɯkɯ
+凶 kʲo̞ɯ
+せろ seɾo̞
+顔見知り kɑo miʃiri
+金利 kinri
+神主 kɑnnɯʃi
+手助け tedasɯke
+説明 se̞tsɯme̞ː
+さまよっ sɑmɑjoʔ
+先立ち sɑkidɑtʃi
+磁力 dʒirʲo̞kɯ
+太古 tɑiko
+きか ki kɑ
+考え直し kɑngɑenɑoʃi
+満面 mɑnmen
+添える soeɾɯ
+質問 ʃit͡sɯmo̞n
+自販機 dʒihɑn ki
+挙がっ ɑgɑʔ
+目立っ medɑʔ
+他人 tɑnin
+大森南 oːmori minɑmi
+占め ʃime̞
+ヘン he̞n
+熾烈 ʃire̞t͡sɯ
+極めて kiɯamete
+開い hirɑi
+ドルトムント do̞rɯto̞mɯnto̞
+盛衰 seːsɯi
+置く o̞kɯ
+皇后 ko̞ɯgo̞ɯ
+キーワード kiː ɯaːdo
+多く o̞ːkɯ
+役場 jɑkɯbɑ
+カーネマン kɑːne mɑn
+手腕 ʃɯɯan
+頑強 gɑnkʲoː
+リリース ririːsɯ
+マネージャー mɑneːdʒɑː
+ひそめ hiso̞me
+コメディ ko̞medi
+ペダル pedɑrɯ
+ずいぶん zɯibɯn
+盗ま nɯsɯma
+ソニーシャープ soniːʃaːpɯ
+冊 satsɯsɯ
+混ぜる mɑzerɯ
+過言 kɑgon
+タケ tɑke
+品位 hini
+我流 gɑrʲɯː
+傷跡 kizɯɑto
+斉藤 sɑitoː
+物言い mo̞no̞iː
+イチ itʃi
+愛媛 e̞hime̞
+国体 kokɯtɑi
+ぼとぼと bo̞to̞bo̞to̞
+ワタ ɯata
+尊重 so̞ntʃo̞ː
+テスト tesɯto
+瞳 hito̞mi
+役目 jɑkɯme
+なおのこと nɑo no koto
+小柄 kogɑrɑ
+半端 hɑnpɑ
+客足 kʲɑkɯɑʃi
+分離 bɯnri
+瀬名 senɑ
+ギザギザ gizɑgizɑ
+粘り nebɑri
+郷土 kʲo̞ɯdo̞
+レイアウト reiɑɯto
+外務省 gɑimɯʃoː
+波田 nɑmitɑ
+回文 kɑibɯn
+リサ risɑ
+褒め ho̞me
+どうせい do̞ːseː
+夢中 mɯtʃɯː
+出来 de̞ki
+繁盛 hɑndʒoː
+大人しい otonɑʃiː
+乗用 dʒo̞ɯjo̞ɯ
+偶然 gɯːze̞n
+もらえる morɑerɯ
+不妊症 ɸɯninʃo̞ɯ
+哀し kɑnɑʃi
+賑わう nigiɯaɯ
+脚注 kʲɑkɯtʃɯː
+高水準 koːsuidʒun
+変わら kaɯara
+口数 kɯtʃikɑzɯ
+本気 ho̞nki
+班 hɑn
+例えば tɑtoebɑ
+灯る to̞mo̞rɯ
+振れ ɸɯre̞
+言下 genkɑ
+キャッ kʲɑ
+主任 ʃɯnin
+山地 sɑntʃi
+向上心 ko̞ɯdʒo̞ɯʃin
+直す naosɯ
+思い浮かべ omoiɯkɑbe
+王道 o̞ɯdo̞ɯ
+連作 ɾensakɯ
+過激 kɑgeki
+汗 ɑse
+引き分け hikiɯake
+儲かり moːkɑri
+仮 kɑri
+特別 to̞kɯbet͡sɯ
+フィー ɸiː
+ツー t͡sɯː
+鎧 jo̞ro̞i
+悪 ɯarɯ
+収まっ osɑmɑʔ
+延吉 e̞nkitʃi
+安けりゃ jasɯkerja
+加湿 kɑʃit͡sɯ
+尽くす tsɯkɯsɯ
+和らげる jaɯaragerɯ
+怠慢 tɑimɑn
+聞こえる kiko̞erɯ
+建議 ke̞ngi
+ふんだんに ɸɯndɑn ni
+うつむき ɯt͡sɯmɯki
+虐待 gʲɑkɯtɑi
+ほしい ho̞ʃiː
+議題 gidɑi
+言えよ iejo̞
+まい mɑi
+鋳造 tʃɯːzo̞ɯ
+劇場 gekidʒo̞ɯ
+楽しむ tɑnoʃimɯ
+成功 seːko̞ː
+育てる sodaterɯ
+当初 to̞ɯʃo̞
+不完全 ɸɯkɑnzen
+同系 do̞ːkeː
+呼び起こし jo̞bio̞ko̞ʃi
+彼等 kɑrerɑ
+ダブルス dabɯɾɯsɯ
+礼儀 re̞ːgi
+にじみ出 nidʒimide̞
+們 ?
+発足 hosokɯ
+組ま kɯmɑ
+ピクッ pikɯ
+北見 kitɑmi
+デート deːto̞
+オー o̞ː
+しかして ʃikɑʃi te
+逸脱 it͡sɯdɑt͡sɯ
+コマ komɑ
+はぐる hɑgɯrɯ
+デザイナー dezɑinɑː
+車載 ʃɑzɑi
+上下水道 ʒoːgesɯidoː
+売れ残り ɯreno̞ko̞ri
+本県 ho̞nken
+武士 bɯʃi
+成り立っ nɑritɑʔ
+につれ ni t͡sɯre̞
+報じ ho̞ɯdʒi
+品薄 ʃinaɯsɯ
+異なり kotonɑri
+要職 jo̞ɯʃo̞kɯ
+最新 sɑiʃin
+大平 oːhirɑ
+受験 dʒɯke̞n
+あれ ɑre
+叱責 ʃiʔse̞ki
+日没 nitʃibo̞t͡sɯ
+筋道 sudʒimitʃi
+反響 hɑnkʲoː
+尻 ʃiri
+太宰 dɑzɑi
+リターン ritɑːn
+明言 me̞ːge̞n
+死な ʃinɑ
+既存 kiso̞n
+先安 sakijasɯ
+名士 me̞ːʃi
+水色 mizɯiro̞
+女王 dʒo̞ːɯ
+矛盾 mɯdʒɯn
+上旬 dʒo̞ɯdʒɯn
+迷走 meːso̞ː
+親方 ojɑkɑtɑ
+区間 kɯkɑn
+中部 tʃɯːbɯ
+壊さ kovɑsɑ
+どうやら doː jɑrɑ
+非難 hinɑn
+労 ro̞ɯ
+蒙 mo̞ɯ
+航 ko̞ɯ
+下げ sɑge
+略語 rʲɑkɯgo
+雪印 jɯkidʒirɯʃi
+態勢 tɑiseː
+地盤 dʒibɑn
+ライト rɑito
+情け nɑsɑke
+クール kɯːrɯ
+分 bɯn
+製品 se̞ːhin
+アンケート ɑnkeːto
+偲び ʃino̞bi
+沢山 takɯsan
+バス basɯ
+巣 sɯ
+誇り ho̞ko̞ri
+暦 ko̞jo̞mi
+おば obɑ
+引き締め hikiʃime̞
+一丸 itʃigɑn
+緩ん jɯrɯn
+メディア mediɑ
+着脱 tʃɑkɯdɑt͡sɯ
+三鷹 mitɑkɑ
+中国 tʃɯːgo̞kɯ
+囓 ?
+一山 hitojɑmɑ
+スイ sɯi
+惹か hikɑ
+大義 tɑigi
+いき iki
+産めよ ɯmejo̞
+ソーシャルゲーム soːsharɯgeːmɯ
+プレゼント pɯrezento̞
+起こそ o̞ko̞so̞
+出世 ʃɯsɨ
+お座なり ozɑnɑri
+接近 se̞ʔkin
+海外 kɑigɑi
+息苦しい ikigɯrɯʃiː
+交戦 kɑʊsən
+銀行 ginko̞ɯ
+横浜ベイスターズ jokohamabeisɯtaːzɯ
+おかけ okɑke
+切り裂け kiɾisɯke̞
+追い打ち o̞iɯtʃi
+フィルム ɸirɯmɯ
+ゾーン zo̞ːn
+ホテルオークラ hoterɯ oːkɯrɑ
+眼前 gɑnzen
+想定 so̞ːteː
+ミュージシャン mʲɯːdʒiʃɑn
+建売り tɑteɯri
+忍者 nindʒɑ
+チーク tʃiːkɯ
+低 te̞ː
+きっ kiʔ
+ケトル keto̞rɯ
+スケート sɯkeːto
+モンスターペアレント monsɯtaːpearento
+潤い ɯrɯo̞i
+ブルワリー bɯrɯ ɯariː
+こんな konnɑ
+だらし dɑrɑʃi
+東洋大 toːjoːdɑi
+由樹 jɯki
+弁 be̞n
+呼び寄せ jo̞bijo̞se
+短い midʒikɑi
+いやあ ijɑː
+怖い koɯai
+穿っ ho̞dʒiʔ
+役立っ jɑkɯdɑʔ
+恒例 ko̞ːreː
+シェフ ʃe̞ɸɯ
+門構え mongɑmɑe
+勝ち抜く kɑtʃi nɯkɯ
+扱える ɑt͡sɯkɑerɯ
+講じ ko̞ɯdʒi
+ろくな rokɯnɑ
+流行れ hɑjɑre
+サイゼリヤ sɑizeɾiɑ
+兼一 ke̞n itʃi
+それに so̞ɾeni
+キレ kire̞
+圧電 ɑt͡sɯden
+盗み nɯsɯmi
+終える o̞erɯ
+空気 kɯːki
+年内 nennɑi
+押し切ら oʃikirɑ
+足跡 ɑʃiɑto
+年賀 nengɑ
+よぎっ jo̞giʔ
+常備 dʒo̞ɯbi
+さっさと sɑʔsɑto
+業態 gʲoːtɑi
+まれ mɑre
+素朴 sobokɯ
+盗塁 to̞ɯrɯi
+大いに o̞ːini
+この間 kono mɑ
+北側 kitagaɯa
+売ろ ɯro̞
+神経 ʃinke̞ː
+東京ガス tokjogasɯ
+市場 ʃidʒo̞ɯ
+ご覧 gorɑn
+印税 inze̞ː
+リラックス ɾiɾakekɯsɯ
+安置 ɑntʃi
+先行き sakijɯki
+粘り気 nebɑrike
+醤油 ʃo̞ɯjɯ
+滑り降りる sɯbeɾjoɾiɾɯ
+ぜいたく zeːtɑkɯ
+面白かっ omoʃirokɑʔ
+昭和 ʃoːɯa
+往復 o̞ɯɸɯkɯ
+分泌 bɯnpit͡sɯ
+バレ bɑre
+もたれれ motɑrere
+殴れ nɑgɯre
+値下げ nezɑge
+投稿 to̞ɯko̞ɯ
+やってき jɑʔteki
+小千谷 odʒijɑ
+感動 kɑndoː
+ぼう bo̞ɯ
+アドオン ɑdo on
+激しい hɑgeʃiː
+なかろ nɑkɑro
+ワーキングプア ɯaːkingɯ pɯa
+手鏡 tekɑgɑmi
+徹子 tet͡sɯko̞
+参議院 sɑndʒiin
+垂れ tɑre
+洪水 ko̞ɯzɯi
+憎たらしく nikɯtɑrɑʃikɯ
+前売り mɑeɯri
+ギボンズ gibo̞nzɯ
+お尋ね otɑzɯne
+ひざ掛け hizɑkɑke
+単刀直入 tɑntoː tʃokɯɲɯː
+昼食 tʃɯːʃo̞kɯ
+向わ mɯkaɯa
+セクハラ sekɯhaɾa
+コンテナ kontenɑ
+経験 ke̞ːke̞n
+會社 kɑiʃɑ
+許す jɯɾɯsɯ
+秘密 himit͡sɯ
+直面 tʃo̞kɯmen
+怪我 kegɑ
+黄色 kiiro̞
+緩める jɯrɯme̞rɯ
+優秀 jɯːʃɯː
+論証 ro̞nʃo̞ɯ
+リニア riniɑ
+ボーイング bo̞ːingɯ
+苦しむ kɯrɯʃimɯ
+福袋 ɸɯkɯbɯkɯro̞
+たい tɑi
+従っ ʃitɑgɑʔ
+よそ jo̞so̞
+天下り ɑmɑkɯdɑri
+与野党 jojɑtoː
+スキャン sɯkjan
+イスラエル isɯɾaeɾɯ
+含ん ɸɯkɯn
+すみ sɯmi
+ところ to̞ko̞ro̞
+サボっ sɑboʔ
+主 nɯʃi
+インターハイ intɑː hɑi
+ヒール hiːrɯ
+長女 tʃo̞ɯdʒo̞
+荒 ko̞ɯ
+学年 gɑkɯnen
+非常勤 hidʒo̞ɯkin
+ぶっ bɯʔ
+体育 tɑiikɯ
+述べよ no̞bejo̞
+怖 koɯa
+細かい komɑkɑi
+りんご ringo̞
+う ɯ
+セリ se̞ɾi
+たたえ tɑtɑe
+とっさ toʔsɑ
+杭州 ko̞ɯʃɯː
+ボーイスカウト boɪsɯkaɯto
+アレン ɑren
+嬉しい ɯre̞ʃiː
+愚か orokɑ
+アンジェリーナ ɑndʒeriːnɑ
+保と ho̞ to̞
+美智子 mitʃiko̞
+尋ね tɑzɯne
+際立つ kiɯadat͡sɯ
+人工 dʒinko̞ɯ
+窓口 mɑdogɯtʃi
+凶作 kjoːsakɯ
+都庁 to̞tʃo̞ɯ
+ついに t͡sɯini
+中米 tʃɯːbe̞ː
+同年 do̞ɯnen
+荒井 ɑrɑi
+ポーチ po̞ːtʃi
+ケープタウン keːpɯtɑɯn
+噴火 ɸɯnkɑ
+流れる nɑgɑrerɯ
+決定的 ke̞ʔte̞ːte̞ki
+何しろ nɑniʃiro
+出たとこ勝負 detɑ toko ʃoːbɯ
+貿易 bo̞ɯeki
+美しく ɯt͡sɯkɯʃikɯ
+投下 toːkɑ
+良き jo̞ki
+肉声 nikɯseː
+ネックレス neʔkɯɾesɯ
+ホッ ho̞
+それ so̞ɾe
+軟調 nɑntʃoː
+哲哉 tet͡sɯjɑ
+タネ tɑne
+リスクヘッジ ɾisɯkɯheʔdʒi
+酌量 ʃɑkɯrʲoː
+振り向け ɸɯrimɯke̞
+お札 oɸɯdɑ
+買い戻し kɑimodoʃi
+菱 hiʃi
+進める sɯsɯmeɾɯ
+神戸 ko̞ɯbe
+妙子 tɑeko
+ベタ betɑ
+語れ kɑtɑre
+混雑 konzɑt͡sɯ
+東宮御所 to̞ɯgɯː go̞ʃo̞
+恐ろしい o̞so̞ɾo̞ʃiː
+夕暮れ jɯːgɯre̞
+伏線 fɯkɯsen
+バリバリ bɑribɑri
+先がけ sɑkigɑke
+そろそろ so̞ɾo̞so̞ɾo̞
+母音 bo̞in
+食い kɯi
+パンク pɑnkɯ
+シール ʃiːrɯ
+返済 hensɑi
+昼間 hirɯmɑ
+何やら nɑn jɑrɑ
+コピーライター kopiːrɑitɑː
+懐かしく nɑt͡sɯkɑʃikɯ
+ラジオ rɑdʒio
+でしょ deʃo̞
+かん kɑn
+車列 ʃɑret͡sɯ
+建 ke̞n
+ブーケ bɯːke̞
+かめ kɑme
+宵越し jo̞igo̞ʃi
+在職 zɑiʃokɯ
+すばらしい sɯbaraʃiː
+粉砕 fɯnsai
+リュウジ rʲɯːdʒi
+告白 kokɯhɑkɯ
+去っ sɑʔ
+イイ ii
+願 gɑn
+まなん mɑ nɑn
+暮れ kɯre̞
+空い mɯnɑʃiː
+近づき tʃikɑzɯki
+表紙 ço̞ɯʃi
+栽培 sɑibɑi
+脂 ɑbɯrɑ
+カテゴリー kɑtegoriː
+章一郎 ʃo̞ɯitʃiro̞ɯ
+芝 ʃibɑ
+西方 seːho̞ː
+区域 kɯiki
+取り組ん to̞rikɯn
+埋め ɯme̞
+かまびすしい kamabisɯʃiː
+一橋大学 hitot͡sɯbɑʃi dɑigɑkɯ
+増大 zoːdɑi
+上回り ɯɯamaɯari
+フォン ɸo̞n
+マヨラー mɑjorɑː
+暫定 zɑnteː
+寂し sɑbiʃi
+くるくる kɯrɯkɯrɯ
+眼科 gɑnkɑ
+胃 i
+待ち受け mɑtʃiɯke
+ブッ bɯ
+入試 ɲɯːʃi
+ノー no̞ː
+乗り出し noridɑʃi
+合っ ɑʔ
+見れ mire̞
+論功行賞 ro̞nko̞ɯ ko̞ɯʃo̞ɯ
+に対し ni tɑiʃi
+ニンニク ninnikɯ
+不動産 fɯdoːsan
+参上 sɑɲʒoː
+思っ o̞mo̞ʔ
+独学 dokɯgɑkɯ
+祝日 ʃɯkɯdʒit͡sɯ
+毎日 mɑinitʃi
+堂々 do̞ɯdo̞ɯ
+西武 seːbɯ
+白黒 ʃiro̞kɯro̞
+ホルモン ho̞rɯmo̞n
+傷病 ʃo̞ɯbʲo̞ɯ
+死因 ʃiin
+ライセンス raisensɯ
+おなじみ onɑdʒimi
+ハイランド hɑirɑndo
+住人 dʒɯːnin
+パジェロ pɑdʒero
+屈辱 kɯt͡sɯdʒo̞kɯ
+公正 ko̞ːseː
+無料 mɯrʲo̞ɯ
+格安 kakɯjasɯ
+路地 ro̞dʒi
+エース eːsɯ
+貯金 tʃo̞kin
+注釈 tʃɯːʃɑkɯ
+岡 okɑ
+証拠 ʃo̞ɯko̞
+ダ dɑ
+アンディー ɑndiː
+カスリ kasɯɾi
+都知事 to̞ tʃidʒi
+コストダウン kosɯtodaɯn
+祐子 jɯːko̞
+多用 tɑjoː
+初回 ʃokɑi
+のぞかせる nozokaserɯ
+ろうそく ɾoːsokɯ
+通気 t͡sɯːki
+重み o̞mo̞mi
+長寿 tʃo̞ɯdʒɯ
+和平 ɯaheː
+服用 ɸɯkɯjo̞ɯ
+実習 dʑiʔshɯː
+伝わら t͡sɯtaɯara
+出合わ deaɯa
+別荘 beʔso̞ː
+奥様 okɯsama
+窩窩頭 kɑkɑ ɑtɑmɑ
+たくさん takɯsan
+ちょうど tʃo̞ɯdo̞
+異口同音 ikɯdo̞ɯo̞n
+ば bɑ
+出向い de̞mɯi
+七草 nanakɯsa
+トランク torɑnkɯ
+売っ ɯʔ
+友人 jɯːdʒin
+コンサートマスター konsaːtomasɯtaː
+稼げる kasegeɾɯ
+木谷 kitɑni
+マルチタスク maɾɯtʃitasɯkɯ
+くさく kɯsakɯ
+手洗い teɑrɑi
+及ん o̞jo̞n
+刑罰 keːbɑt͡sɯ
+やさしい jɑsɑʃiː
+仕事 ʃigo̞to̞
+議員 giin
+反り so̞ɾi
+実戦 dʑisən
+スラム sɯɾamɯ
+向ける mɯke̞rɯ
+署名 ʃo̞meː
+皇太子 koːtɑiʃi
+荒地 ɑretʃi
+階級 kɑikʲɯː
+捕 ho̞
+死亡 ʃibo̞ɯ
+折 o̞ri
+科学 kɑgɑkɯ
+送り o̞kɯri
+あらわれる araɯarerɯ
+たまり tɑmɑri
+賞 ʃo̞ɯ
+爆笑 bɑkɯʃoː
+イスラム教 isɯramɯkjoː
+モノマネ monomɑne
+単なる tɑnnɑrɯ
+少々 ʃo̞ɯʃo̞ɯ
+行き iki
+故 ko̞
+止まら tomɑrɑ
+どうにも do̞ɯ ni mo̞
+噂 ɯwasa
+裏方 ɯrɑkɑtɑ
+横取り jo̞ko̞do̞ri
+話す hanasɯ
+エレベータ erebeːtɑ
+増える ɸɯe̞rɯ
+手柄 tegɑrɑ
+ずっと zɯʔto̞
+機器 kiki
+訝しく ibɯkɑʃikɯ
+予兆 jo̞tʃo̞ɯ
+簡素 kɑnso
+ミッシェル miʔʃeɾɯ
+恋愛 ɾɛnɑi
+かかさ kɑkɑsɑ
+親鸞 ʃinrɑn
+面倒くさく mendoːkɯsakɯ
+雑巾 zo̞ɯkin
+倒れる tɑorerɯ
+ダジャレ dɑdʒɑre
+そうした soːʃitɑ
+青春 seːʃɯn
+朝陽 ɑsɑhi
+ジャッキー dʒɑʔkiː
+トレーディング to̞reːdingɯ
+ニター nitɑː
+先祖 senzo̞
+汚れ jo̞go̞re
+恒星 ko̞ːseː
+まったく mɑʔtɑkɯ
+警部 ke̞ːbɯ
+なお nɑo
+ビジョン bidʒo̞n
+緊急 kinkʲɯː
+陥り o̞tʃiiri
+だす dasɯ
+つられる t͡sɯrɑrerɯ
+ブラジル bɯrɑdʒirɯ
+接客 seʔkjakɯ
+コロン ko̞ro̞n
+分かり ɯakari
+二日酔い ɸɯtɑkɑ joi
+吐き出し hɑkidɑʃi
+追放 t͡sɯiho̞ɯ
+リフレッシュ ɾifɯɾesʃɯ
+青い ɑoi
+セイ se̞i
+仕送り ʃio̞kɯri
+人形 ningʲo̞ɯ
+ミーティング miːtingɯ
+日清製粉 niʔʃinseːhɯn
+けし ke̞ʃi
+戯言 zɑregoto
+順 dʒɯn
+理由 rijɯː
+こうして ko̞ɯ ʃite
+林野庁 ɾiɲɑtʃoː
+就寝 ʃɯːʃin
+ダウン dɑɯn
+憂鬱 jɯːɯt͡sɯ
+かかわる kakaɯarɯ
+もらえ morɑe
+コート ko̞ːto̞
+いく ikɯ
+監視 kɑnʃi
+防ぐ fɯseɡɯ
+おおまか oːmɑkɑ
+気づき kizɯki
+求め mo̞to̞me
+随分 zɯibɯn
+人影 hitokɑge
+にたいして ni tɑiʃite
+足音 ɑʃioto
+アナログ ɑnɑrogɯ
+ほし ho̞ʃi
+標榜 ço̞ɯbo̞ɯ
+強引 go̞ɯin
+グラフ gɯrɑɸɯ
+心構え kokorogɑmɑe
+蜀 ʃo̞kɯ
+妬み netɑmi
+グラム gɯrɑmɯ
+交通 ko̞ɯt͡sɯː
+先月 se̞ᵑge̞tsɯ
+ハケンライフ hɑkenrɑiɸɯ
+引き留める hikito̞merɯ
+死 ʃi
+露店 ro̞ten
+ひび hibi
+莉子 riko̞
+外観 gɑikɑn
+ロサンゼルスタイムズ rozanzeɾɯsɯtaimɯzɯ
+シンクタンク ʃinkɯ tɑnkɯ
+打ち破ら ɯtʃijɑbɯrɑ
+答申 to̞ɯʃin
+官僚 kɑnrʲoː
+見せる miseɾɯ
+大口 o̞ːgɯtʃi
+見通し mito̞ːʃi
+デザート dezɑːto
+どっち do̞ʔtʃi
+怖がっ koɯagaʔ
+つまされる tsɯmasaɾeɾɯ
+泉 izɯmi
+歌う ɯtɑɯ
+現行 genko̞ɯ
+パルス paɾɯsɯ
+書 ʃo̞
+干物 himo̞no̞
+つくれ t͡sɯkɯre̞
+葬式 so̞ːshiki
+ゆで jɯde̞
+夏休み natsɯjasɯmi
+変身 he̞nʃin
+転ん ko̞ro̞n
+ガーディアン gɑːdiɑn
+仙台 sendɑi
+銭湯 sento̞ː
+現れ araɯare
+のんき no̞nki
+鮫島 sɑmeʃimɑ
+いったん iʔtɑn
+寄っ jo̞ʔ
+画 gɑ
+増収 zo̞ɯʃɯː
+歩け ɑrɯke
+不公平 ɸɯko̞ːheː
+ドラマ dorɑmɑ
+平家 he̞ːke̞
+育 ikɯ
+手順 te̞dʒɯn
+ウェーブ e̞ːbɯ
+ネイル ne̞irɯ
+マッサージ maʔsaːdʒi
+商学 ʃoːgɑkɯ
+何だか nɑn dɑ kɑ
+志願 ʃigɑn
+プレス pɯɾesɯ
+望む no̞zo̞mɯ
+歩み寄れ ɑjɯmijore
+菅野 kɑnno
+深澤 ɸɯkazaɯa
+カブ kɑbɯ
+盤石 bɑndʒɑkɯ
+目減り me̞be̞ri
+菜っ葉 nɑʔpɑ
+陸路 rikɯro̞
+証し ɑkɑʃi
+優れる sɯɣɯɾeɾɯ
+伺えれ ɯkɑgɑere
+パラドックス paradokekɯsɯ
+落ち o̞tʃi
+悪く ɯarɯkɯ
+母子 bo̞ʃi
+パワープレイ paɯaː pɯrei
+中元 tʃɯːge̞n
+敗因 hɑiin
+官能 kɑnnoː
+新進 ʃinʃin
+いたって itɑʔte
+出くわし dekɯɯaʃi
+っ子 ʔko̞
+裏打ち ɯrɑɯtʃi
+手遅れ teo̞kɯre
+へえ he̞ː
+開ける ɑkerɯ
+ゆら jɯrɑ
+盛 sɑkɑn
+観 kɑn
+宣言 seᵝɑgen
+顧客 kokʲɑkɯ
+眠れる ne̞mɯre̞rɯ
+実弾 dʒit͡sɯdɑn
+しか ʃikɑ
+利尻 riʃiri
+率 rit͡sɯ
+被写体 hiʃɑ tɑi
+ハワイ大学 haɯai daigakɯ
+浮揚 ɸɯjo̞ɯ
+盛り込む mo̞riko̞mɯ
+偉大 idɑi
+蛇口 dʒɑgɯtʃi
+負 ɸɯ
+練習 re̞nʃɯː
+電話機 denɯa ki
+持つ mo̞t͡sɯ
+名声 me̞ːse̞ː
+要旨 jo̞ɯʃi
+伊 i
+きょう kʲo̞ɯ
+ショー ʃo̞ː
+遠ざけ toːzɑke
+扱う ɑt͡sɯkɑɯ
+踏み出せ fɯmiðase
+御 go̞
+移植 iʃo̞kɯ
+再生 sɑiseː
+苦し kɯrɯʃi
+サブプライムローン sabɯpɯraimɯroːn
+歯ぎしり hɑgiʃiri
+水性 mizɯʃo̞ɯ
+話そ hɑnɑso
+スネ sɯne
+総合 so̞ːɡo̞ː
+会話 kaiɯa
+奇跡 kise̞ki
+立ち止まる tɑtʃidomɑrɯ
+伊藤忠 ito̞ɯtʃɯː
+到来 toːrɑi
+キャスティング kjasɯtiᵑgɯ
+乱暴 rɑnboː
+柳 jɑnɑgi
+現地 ge̞ntʃi
+忙しく isoɡaʃikɯ
+職 ʃo̞kɯ
+ズル zɯrɯ
+ラクラク rɑkɯrɑkɯ
+何でも nɑn de mo
+垣根 kɑkine
+東屋 ɑzɯmɑjɑ
+化ける bɑkerɯ
+修復 ʃɯːɸɯkɯ
+達 to̞ːrɯ
+置こ o̞ko̞
+大分 oːitɑ
+伸長 ʃintʃo̞ɯ
+起こり o̞ko̞ri
+子音 ʃiin
+大金 tɑikin
+老人 ro̞ɯdʒin
+並 nɑmi
+創設 so̞ːsetsɯ
+不可解 ɸɯkɑkɑi
+底堅い sokoɡɑtɑi
+報いる mɯkɯirɯ
+バラエティ bɑrɑeti
+貫く t͡sɯrɑnɯkɯ
+換気扇 kɑnkisen
+躍動 jɑkɯdoː
+包める t͡sɯt͡sɯme̞rɯ
+囲変更 kɑkoi henkoː
+最低 sɑiteː
+ゴルフ go̞rɯɸɯ
+猫 neko̞
+今夏 konkɑ
+通達 t͡sɯːtɑt͡sɯ
+詐称 sɑʃoː
+歪む jɯgɑmɯ
+待遇 tɑigɯː
+オフショア oɸɯʃoɑ
+八百長 jɑotʃoː
+功利 ko̞ɯri
+ブレーキ bɯre̞ːki
+寺田 terɑdɑ
+姉ちゃん ɑnetʃɑn
+遅かっ osokɑʔ
+リアルタイム riɑrɯ tɑimɯ
+爆心 bɑkɯʃin
+撮れる to̞rerɯ
+インサイド insɑiðo
+底上げ sokoɑdʒe
+色調 ʃikitʃo̞ɯ
+食べる tɑberɯ
+大前 oːmɑe
+予備 jo̞bi
+赤外線 sekiɰᵝɑisen
+成す nasɯ
+利回り rimaɯari
+スラスラ sɯɾasɯɾa
+風習 ɸɯːʃɯː
+朝廷 tʃo̞ːteː
+湯 jɯ
+シーズン ʃiːzɯn
+稔 mino̞rɯ
+うち ɯtʃi
+さかのぼっ sɑkɑnoboʔ
+ボディ bo̞di
+前倒し mɑedɑoʃi
+シンガポール ʃingɑpoːrɯ
+債務 saimɯ
+スベ sɯbe
+皇族 ko̞ɯzo̞kɯ
+別れ話 ɯakarebanaʃi
+イン in
+不愉快 ɸɯjɯkɑi
+上る no̞bo̞rɯ
+働き者 hɑtɑrɑkimono
+ムール貝 mɯːrɯ kɑi
+尋常 dʒindʒo̞ɯ
+上皮 dʒo̞ɯhi
+言う jɯː
+ヒスタミン hisɯtamin
+ライブハウス raibɯhaɯsɯ
+民放連 minpo̞ɯren
+仕業 ʃiɯaza
+ケア keɑ
+未練 mire̞n
+アテ ɑte
+あんなに ɑnnɑ ni
+案外 ɑngɑi
+パクチー pɑkɯtʃiː
+落ち着き o̞tʃit͡sɯki
+値引き ne̞biki
+連合 rengo̞ɯ
+土日 do̞nitʃi
+に関して ni kɑnʃite
+松宮 mɑt͡sɯmijɑ
+断言 dɑngen
+しめ ʃime̞
+銃刀 dʒɯːto̞ɯ
+受診 dʒɯʃin
+速かっ hɑjɑkɑʔ
+明治 me̞ːdʒi
+ワンセグ vansegɯ
+ヒョソン hjo̞so̞n
+なかでも nɑkɑ de mo
+キャバレー kʲɑbɑreː
+グッ gɯ
+口々 kɯtʃigɯtʃi
+諸兄 ʃo̞keː
+クレシア kɯre ʃiɑ
+パーティション pɑːtiʃon
+善悪 zenakɯ
+執務 ʃit͡sɯmɯ
+長野 nɑgɑno
+小雪 ko̞jɯki
+猟犬 rʲo̞ɯken
+正攻法 seːko̞ːho̞ː
+まわり maɯari
+呼ば jobɑ
+殿様 tonosɑmɑ
+公立 ko̞ɯrit͡sɯ
+新規 ʃinki
+仇敵 kʲɯːte̞ki
+うれしかっ ɯreʃikɑʔ
+ふぐ ɸɯgɯ
+名物 me̞ːbɯt͡sɯ
+振動 ʃindo̞ɯ
+球団 kʲɯːdɑn
+時 dʒi
+スポーツカー sɯpoːtsɯkaː
+ピアソラ pjɑsoɾɑ
+恐る osoɾɯ
+封じ込め ɸɯːdʒiko̞me
+めんどくさい mendokɯsai
+総長 so̞ːtʃo̞ː
+安 ɑn
+月 t͡sɯki
+造船 zoːsɛn
+水戸黄門 mito̞ ko̞ɯmo̞n
+ドキ do̞ki
+平ら tɑirɑ
+一安心 itʃi ɑnʃin
+姉さん ɑnesɑn
+買収 bɑiʃɯː
+司 tsɯkasa
+にくい nikɯi
+とくと to̞kɯto̞
+ホールディングス hoːɾɯdiᵑgɯsɯ
+美徳 bito̞kɯ
+要素 jo̞ːso̞
+悔い kɯi
+詩人 ʃidʒin
+違う tʃigɑɯ
+口論 ko̞ɯro̞n
+拒む kobɑmɯ
+あれよ ɑrejo
+素直 sɯnao
+完全 kɑnzen
+温床 o̞nʃo̞ɯ
+うっかり ɯʔkɑri
+断ち切ら tɑtʃikirɑ
+静寂 seːjakɯ
+試乗 ʃidʒo̞ɯ
+美味しい o̞iʃiː
+スタジアム sutadʒiamu
+パーティ pɑːti
+嘆き nɑgeki
+漁業 gʲo̞gʲo̞ɯ
+乏しい to̞bo̞ʃiː
+体勢 tɑisei
+語っ go̞ʔ
+両派 rʲoːhɑ
+様子見 joːsɯmi
+重く o̞mo̞kɯ
+目当て meɑte
+恨み ɯrɑmi
+接触 sɛshokɯ
+宴会 enkɑi
+公職 ko̞ɯʃo̞kɯ
+南ア nɑnɑ
+頭抜け zɯnɯke̞
+要請 jo̞ːseː
+容認 jo̞ɯnin
+逃れ nogɑre
+ムリ mɯri
+ダボス dabosɯ
+それほど so̞ɾeho̞do̞
+博子 hiro̞ko̞
+かっこ kɑʔko
+なかには naka ni ɯa
+心根 ko̞ko̞ro̞ne
+不謹慎 ɸɯkinʃin
+降り ɸɯri
+そうろう so̞ːɾo̞ː
+うまい ɯmɑi
+嵐 ɑrɑʃi
+芸能人 geːno̞ːdʒin
+向き mɯki
+高層 koːsoː
+百 çɑkɯ
+大八木 oːjɑgi
+余録 jo̞ro̞kɯ
+同席 do̞ːseki
+掲載 kɑisɑi
+収縮 ʃɯːʃɯkɯ
+余計 jo̞keː
+疫 e̞ki
+デフレ de̞ɸɯre̞
+研修生 kenshɯːseː
+ドリンク do̞rinkɯ
+よかろ jokɑro
+アイオワ aioɯa
+還付 kɑnpɯ
+多き o̞ːki
+人数 ninzɯː
+緩やか jɯrɯjɑkɑ
+つくる t͡sɯkɯrɯ
+幡 hɑtɑ
+遭っ ɑʔ
+一度 itʃi do̞
+連覇 renpɑ
+市川 itʃikaɯa
+マナー mɑnɑː
+地上 tʃidʒo̞ɯ
+エリート eriːto̞
+押し上げる oʃiɑgerɯ
+造り t͡sɯkɯri
+木製 mokɯseː
+見つかっ mit͡sɯkɑʔ
+指向 ʃiko̞ɯ
+せれ se̞ɾe̞
+日雇い hijɑtoi
+シラケ ʃirɑke
+目立ち medɑtʃi
+受け答え ɯkekotɑe
+内心 nɑiʃin
+ン n
+ハンドリング hɑndoringɯ
+誤算 gosɑn
+バッテリー bɑʔteriː
+おろそか oɾosokɑ
+機軸 kidʒikɯ
+塊 kɑtɑmɑri
+飽きる ɑkirɯ
+案件 ɑnken
+ダイハツ dɑihɑt͡sɯ
+飲む no̞mɯ
+傾 ke̞ː
+今期 ko̞nki
+乗り遅れ no̞ri o̞kɯre
+海老名 ebinɑ
+ゲスト gesɯto
+陶芸 to̞ːgeː
+深草 fɯkakɯsa
+黒人 ko̞kɯdʒin
+含み ɸɯkɯmi
+孤独 ko̞do̞kɯ
+考える kɑngɑerɯ
+さようなら sɑjoːnɑɾɑ
+慎吾 ʃingo̞
+クバンランド kɯbɑnrɑndo
+石巻 iʃinomɑki
+痛手 itɑde
+独立 do̞kɯrit͡sɯ
+根強く nezɯjo̞kɯ
+デリケート derikeːto̞
+士 ʃi
+着せ kise̞
+最多 sɑitɑ
+オクテ o̞kɯte
+ファー ɸɑː
+財団 zɑidɑn
+不自由 ɸɯdʒijɯː
+湿原 ʃit͡sɯge̞n
+シャフト ʃɑɸɯto
+安価 ɑnkɑ
+篤人 ɑt͡sɯto
+無知 mɯtʃi
+知れ ʃire̞
+裁か sɑbɑkɑ
+もちろん mo̞tʃiro̞n
+開か hirɑkɑ
+相場 soːbɑ
+月曜 get͡sɯjo̞ɯ
+ミミズ mimizɯ
+呪い no̞ro̞i
+知ら ʃirɑ
+水着 mizɯgi
+とどまり todomɑri
+土 t͡sɯtʃi
+流さ nɑgɑsɑ
+開催 kɑisɑi
+敗訴 hɑiso
+仮に kɑri ni
+生家 seːkɑ
+ねばねば nebɑnebɑ
+レビュー re̞bʲɯː
+いささか isɑsɑkɑ
+何もかも nɑn mo kɑ mo
+拾っ dʒɯʔ
+祭典 sɑiten
+剣道 kendo̞ɯ
+下水 gəzɯi
+雰囲気 fɯniki
+田中 tɑnɑkɑ
+広げ hiro̞ge
+とり to̞ri
+通商 t͡sɯːʃo̞ɯ
+紅茶 koːtʃɑ
+んじゃ ndʒɑ
+街 mɑtʃi
+支払う ʃihɑrɑɯ
+職員 ʃo̞kɯin
+海上 kɑidʒoː
+見渡し miɯataʃi
+晴れ hɑre
+抜く nɯkɯ
+増し mɑʃi
+動体 doːtɑi
+牡蠣 kɑki
+内外 nɑigɑi
+いただい itɑdɑi
+手引き te̞biki
+浮気 ɯɯaki
+劇映画 geki eigɑ
+切り口 kirikɯtʃi
+救う sɯkɯɯ
+別紙 beʔʃi
+飲 in
+法務 ho̞ɯmɯ
+逃げる nige̞rɯ
+仁愛 dʒinɑi
+溶かし tokɑʃi
+音色 neiro̞
+栄光 eːko̞ɯ
+代物 ʃiro̞mo̞no̞
+ばつ bɑt͡sɯ
+光輪 ko̞ɯrin
+白け ʃirɑke
+助かる tasɯkaɾɯ
+ち tʃi
+美容 bijo̞ɯ
+シチュールウ ʃitʃɯː rɯː
+伸び悩む nobinɑjɑmɯ
+異例 ire̞ː
+総 so̞ː
+平行 heːko̞ɯ
+気長 kinɑgɑ
+ショウガ ʃoːgɑ
+不可能 ɸɯkɑnoː
+つぶやき t͡sɯbɯjɑki
+海中 kɑitʃɯː
+物々交換 bɯt͡sɯbɯt͡sɯ koːkɑn
+ピカ pikɑ
+すみません sɯmimasen
+利害 rigɑi
+最初 sɑiʃo
+アメリカ ɑmerikɑ
+地上波 tʃidʒoːhɑ
+こ ko̞
+杉浦 sɯdʒiɯra
+厳選 gənsən
+おもしろ o̞mo̞ʃiro̞
+詰まり t͡sɯmɑri
+国際 kokɯsai
+警鐘 keːʃo̞ː
+呈す teːsɯ
+段違い dɑntʃigɑi
+大学 dɑigɑkɯ
+パパ pɑpɑ
+奮発 ɸɯnpɑt͡sɯ
+スリラー sɯɾiɾaə
+祥子 ʃo̞ɯko̞
+適合 tekigo̞ɯ
+詳しく kɯɯaʃikɯ
+休ま jasɯma
+歪ん jɯgɑn
+紹介 ʃoːkɑi
+背 se̞
+合わさ aɯasa
+点灯 tento̞ɯ
+教諭 kʲo̞ɯjɯ
+重ねる kasanerɯ
+余っ jo̞ʔ
+細 o̞so̞
+負荷 ɸɯkɑ
+真珠湾 ʃindʒɯ ɯan
+橋立 hɑʃidɑte
+青 ɑo
+水面 sɯimen
+通販 t͡sɯːhɑn
+ウィンザー inzɑː
+傘下 sɑnkɑ
+ポケモン po̞kemo̞n
+わかり ɯakari
+緩 kɑn
+明秀 ɑkihide
+無様 bɯzɑmɑ
+プライシング pɯrɑiʃingɯ
+衣装 iʃo̞ɯ
+過剰 kɑdʒoː
+箱 bɑko
+医学部 igɑkɯ bɯ
+サマ sɑmɑ
+秘訣 hike̞t͡sɯ
+スパム sɯpamɯ
+スカウト sɯkaɯto
+気づか kizɯkɑ
+リチウム ritʃiɯmɯ
+開会 kɑikɑi
+産ん ɯn
+私的 ʃite̞ki
+合併症 gɑʔpeːʃoː
+前段 zendɑn
+パタン pɑtɑn
+国内 kokɯnɑi
+トラクション torɑkɯʃon
+河村 kaɯamɯra
+ところが tokoro gɑ
+飼育 ʃiikɯ
+代弁 dɑiben
+際 sɑi
+在宅 zɑitɑkɯ
+メカニズム mekɑnizɯmɯ
+スクロール sɯkɯroːɾɯ
+おかしい okɑʃiː
+募っ t͡sɯno̞ʔ
+戻る mo̞do̞rɯ
+タバコ tɑbɑko
+なー nɑː
+まわし maɯaʃi
+人間 ninge̞n
+重大 dʒɯːdɑi
+初対面 hɑt͡sɯ tɑimen
+芥川 akɯtagaɯa
+鮮魚 seɲgo
+解説 kɑisetsɯ
+修繕 ʃɯːze̞n
+斜 ʃɑ
+仕様 ʃijo̞ɯ
+募る t͡sɯno̞rɯ
+題材 dɑizɑi
+全国 zenko̞kɯ
+ゲル ge̞rɯ
+オリーブ油 oriːbɯ ɑbɯrɑ
+汚さ kitɑnɑsɑ
+グー gɯː
+専行 senko̞ː
+敷居 ʃikii
+遠足 ensokɯ
+憤り ikido̞ːri
+首脳 ʃɯno̞ɯ
+参照 sɑnsoː
+レザー rezɑː
+似合わ niaɯa
+鉱夫 ko̞ɯɸɯ
+箇所 kɑʃo
+いいえ iːe̞
+安藤 ɑndoː
+料 rʲo̞ɯ
+先駆 seŋkɯ
+思い出さ omoidɑsɑ
+テキサス tekisasɯ
+オルファ orɯɸɑ
+視覚 ʃikɑkɯ
+飛べ to̞be
+バイオ bɑio
+教委 kʲo̞ɯi
+要点 jo̞ɯten
+ちと tʃito̞
+揃い so̞ɾo̞j
+拡大 kɑkɯdɑi
+貨物 kɑmot͡sɯ
+弱点 dʒɑkɯten
+六月 rokɯ gɑt͡sɯ
+ものすごい monosɯɡoj
+上田 ɯedɑ
+開口 kɑikoː
+注文 tʃɯːmo̞n
+大気 tɑiki
+飛び立っ tobitɑʔ
+有人 jɯːdʒin
+物議 bɯt͡sɯgi
+関東 kɑntoː
+首都 ʃɯto̞
+要は joː ɯa
+予知 jo̞tʃi
+急激 kʲɯːge̞ki
+表彰台 çoːʃoː dɑi
+沿う so̞ː
+定まり sɑdɑmɑri
+中途 tʃɯːto̞
+原爆 genbɑkɯ
+白っぽい ʃiro̞ʔpo̞i
+だらだら dɑrɑdɑrɑ
+小麦 ko̞mɯgi
+牛肉 gʲɯːnikɯ
+井戸 ido̞
+攻め込む semekomɯ
+喜洋 jo̞ʃihiro̞
+大前提 dɑizenteː
+ケンタッキー kentɑʔkiː
+分割 bɯnkɑt͡sɯ
+梅雨 t͡sɯjɯ
+ポ po̞
+挑む ido̞mɯ
+回っ maɯaʔ
+不良 ɸɯrʲo̞ɯ
+うかがう ɯkɑgɑɯ
+サポート sɑpoːto
+ほったらかし hoʔtɑrɑkɑʃi
+事柄 kotogɑrɑ
+めくっ me̞kɯʔ
+鑑定 kɑnteː
+巨漢 kʲokɑn
+卵 tɑmɑgo
+エクステラ ekɯsɯtera
+いたし itɑʃi
+掘れ ho̞re
+妙 mʲo̞ɯ
+国債 kokɯsai
+過ごす sɯɣosɯ
+方言 ho̞ɯgen
+構内 koːnɑi
+有っ ɑʔ
+クラッシック kɯɾɯsʃɪʔkɯ
+撮れ to̞re
+叫ば sɑkebɑ
+隠し kɑkɯʃi
+クマ kɯmɑ
+痩せりゃ jɑserjɑ
+同数 doːsɯː
+乾 inɯi
+遣う t͡sɯkɑɯ
+贈っ o̞kɯʔ
+肌触り hadazaɯari
+刑事 ke̞ːdʒi
+抑え込む osaekomɯ
+外角 gɑikɑkɯ
+いくぶん ikɯbɯn
+ジブリ dʒibɯri
+旦那 dɑnnɑ
+リスク ɾisɯkɯ
+かかわれ kakaɯare
+秀逸 ʃɯːit͡sɯ
+思わ omoɯa
+就航 ʃɯːko̞ɯ
+サイン sɑin
+協和 kʲoːɯa
+専門医 senmo̞ni
+昔話 mɯkɑʃibɑnɑʃi
+部長 bɯtʃo̞ɯ
+長 nɑgɑ
+政界 seːkɑi
+テクニカル tekɯnikɑrɯ
+絶やさ tɑjɑsɑ
+脳裏 no̞ɯri
+疑う ɯtɑgɑɯ
+集落 ʃɯːrɑkɯ
+池上 ikegɑmi
+日に日に hi ni hi ni
+いう iɯ
+麻薬 mɑjɑkɯ
+三田 mitɑ
+コンセンサス konsensasɯ
+抱っこ dɑʔko
+薬事 jɑkɯdʒi
+真鍮 ʃintʃɯː
+追って o̞ʔte
+人事 dʒindʒi
+おく o̞kɯ
+カーボンフードカーボンルーフ kɑːbon ɸɯːdo kɑːbon rɯːɸɯ
+語 go̞
+身銭 mize̞ni
+すごい sɯɡoj
+欠け kɑke
+激減 ge̞kige̞n
+寄り添い jo̞ɾiso̞j
+チャネルコンフリクト tʃɑnerɯ konɸɯrikɯto
+すると sɯrɯto
+源 minɑmoto
+デコレーション deko̞reːʃo̞n
+カンタン kɑntɑn
+永久 e̞ːkʲɯː
+場中 dʒo̞ɯtʃɯː
+事欠か kotokɑkɑ
+文献 bɯnke̞n
+論外 rongɑi
+いっさい iʔsɑi
+ジャージー dʒɑːdʒiː
+イールドカーブ iːrɯdo kɑːbɯ
+任せれ mɑkɑzeɾe
+延び延び no̞bino̞bi
+餡 ɑn
+森林 ʃinrin
+売 ɯri
+と to̞
+油 ɑbɯrɑ
+効く kikɯ
+切れ kire̞
+言葉尻 kotobɑdʒiri
+きっぱり kiʔpɑri
+建屋 tɑtejɑ
+整え to̞to̞no̞e
+転校 tenko̞ɯ
+評論 ço̞ɯro̞n
+歴任 re̞kinin
+心理 ʃinri
+ミシン miʃin
+理屈 rikɯt͡sɯ
+小林 kobɑjɑʃi
+無能 mɯno̞ɯ
+三菱自動車 mit͡sɯbiʃi dʒidoːʃɑ
+ネタ netɑ
+周 ʃɯː
+両親 rʲo̞ɯʃin
+方 ho̞ɯ
+拭く ɸɯkɯ
+適し te̞kiʃi
+ホワイト hoɯaito
+並べ nɑrɑbe
+派閥 hɑbɑt͡sɯ
+飢え死に ɯe̞dʒini
+シーリング ʃiːringɯ
+たくしゃ tɑkɯʃɑ
+刺し sɑʃi
+インド indo̞
+斜視 ʃɑʃi
+本庁 ho̞ntʃo̞ɯ
+内定 nɑiteː
+一息 hito̞iki
+リョウコ rʲo̞ɯko̞
+弘 hiro̞ʃi
+ポスト posɯto
+定型 te̞ːke̞ː
+ガチンコ gɑtʃinko
+リカ rikɑ
+狼 oːkɑmi
+採取 saiʃɯ
+心情 ʃindʒo̞ɯ
+工場 ko̞ɯdʒo̞ɯ
+ミカン mikɑn
+舵 kɑdʒi
+仕草 ʃiɰᵝɑzɑ
+リーマン riːmɑn
+アウトドア ɑɯtodoɑ
+一世一代 iʔseːitʃidɑi
+会い ɑi
+白菜 hakɯsai
+直営店 tʃo̞kɯeːten
+フラー ɸɯrɑː
+切る kirɯ
+かなわ ka na ɯa
+ポッドキャスト podokjasɯto
+境内 keːdɑi
+一夫多妻 ipɯtasai
+判 hɑn
+密度 mit͡sɯdo̞
+執拗 ʃit͡sɯjo̞ɯ
+日取り hido̞ri
+無病 mɯbʲo̞ɯ
+歯応え hɑgotɑe
+タイト tɑito
+話し掛ける hɑnɑʃikɑkerɯ
+奢る o̞go̞rɯ
+それなのに sorenɑnoni
+馬場 bɑbɑ
+さまざま sɑmɑzɑmɑ
+あたる ɑtɑrɯ
+出来れ de̞kire̞
+倒れ tɑore
+碑 hi
+メジチ me̞dʒitʃi
+あがっ ɑgɑʔ
+自衛 dʒie̞ː
+ダルフール dɑrɯɸɯːrɯ
+ひとり合点 hitori gɑten
+道路 do̞ɯro̞
+劇団 gekidɑn
+完了 kɑnrʲoː
+承継 ʃo̞ːkeː
+年配 nenpɑi
+委 i
+育ち sodɑtʃi
+だけど dɑkedo
+若き ɯakaki
+税理士 ze̞ːriʃi
+仮想 kɑsoː
+あらわ araɯa
+質疑 ʃit͡sɯgi
+外れ hɑzɯre
+麦芽 bɑkɯgɑ
+バーベキュー bɑːbekʲɯː
+騙す damasɯ
+乏しかっ toboʃikɑʔ
+省力 ʃo̞ɯrʲo̞kɯ
+氷山 çoːzɑn
+今頃 imɑgoro
+生業 seːgoː
+トイレ to̞ire
+低め hikɯme̞
+松屋 mɑt͡sɯjɑ
+決まり kimɑri
+マイル mɑirɯ
+脇毛 ɯakige
+独走 dokɯsoː
+前者 zenʃɑ
+現時点 ge̞ndʒite̞n
+年寄り to̞ʃijo̞ri
+やわらかく jaɯarakakɯ
+プリウス pɯɾjɯsɯ
+年来 nenrɑi
+自粛 dʒiʃɯkɯ
+夫人 ɸɯdʒin
+とも to̞mo̞
+現在 genzɑi
+真ん中 mɑnnɑkɑ
+当て字 ɑtedʒi
+消える kie̞rɯ
+先々 sɑkizɑki
+吹石 ɸɯkiiʃi
+不調 ɸɯtʃo̞ɯ
+集団 ʃɯːdɑn
+ロンドン ro̞ndo̞n
+砲門 ho̞ɯmo̞n
+いずれ izɯre̞
+なでおろし nɑdeoroʃi
+うっとり ɯʔto̞ri
+浮かん ɯkɑn
+融和 jɯːɯa
+押しつぶさ oshitsɯbɯza
+傷 kizɯ
+勢い ikio̞i
+千葉 tʃibɑ
+重ね重ね kɑsɑnegɑsɑne
+フグ ɸɯgɯ
+ファンタスティック fantasɯtikekɯ
+焼い jɑi
+父子 ɸɯʃi
+京都大 kʲotodɑi
+花束 hɑnɑtɑbɑ
+ベンチ be̞ntʃi
+ハンディ hɑndi
+ヒロミ hiro̞mi
+キツイ kit͡sɯi
+近親 kinʃin
+香ばしい koːbɑʃiː
+糧 kɑte
+ガイド gɑido
+情けなく nasakenakɯ
+伺え ɯkɑgɑe
+泣き言 nɑkigoto
+後押し ɑtoːʃi
+フニャン ɸɯɲɑn
+行わ okonaɯa
+寝癖 neɰᵝɑze
+興南 koːnɑn
+恒雄 t͡sɯneo̞
+前編 ze̞npe̞n
+認定 ninte̞ː
+細々 ho̞so̞bo̞so̞
+スコア sɯkoɑ
+ズボン zɯbo̞n
+世話焼き seɯajaki
+リスクオフ ɾisɯkɯofɯ
+通電 t͡sɯːde̞n
+小渕 o̞bɯtʃi
+専念 sɛnən
+簡潔 kɑnket͡sɯ
+籠 kɑgo
+記者 kiʃɑ
+杉並 sɯginami
+楕円 dɑen
+たびたび tɑbitɑbi
+締めくくり ʃime̞kɯkɯri
+隈 kɯmɑ
+矢先 jɑsɑki
+さかん sɑkɑn
+就い t͡sɯi
+祭 mɑt͡sɯri
+占い師 ɯrɑnɑiʃi
+小走り kobɑʃiri
+ソープ soːpɯ
+誤る ɑjɑmɑrɯ
+清山 kijojɑmɑ
+明る ɑkɑrɯ
+食前 ʃo̞kɯzen
+砂利 dʒɑri
+平壌 pjoɲɑn
+こんにちは konnitʃihɑ
+キレイ kire̞ː
+へそくり hesokɯri
+中断 tʃɯːdɑn
+酒井 sɑkɑi
+姑 ʃɯːto̞me
+沖浦 okiɯrɑ
+佳 kɑ
+こわ koɯa
+引っ越す hiʔkosɯ
+害毒 gɑidokɯ
+上下 dʒo̞ɯge
+治る nɑorɯ
+薄め ɯsɯme
+無理やり mɯrijɑri
+棟梁 to̞ɯrʲo̞ɯ
+ステータス sɯteːtasɯ
+転記 te̞nki
+買い替え kɑikɑe
+困り komɑri
+ちゃう tʃɑɯ
+真っ黒 mɑʔkɯro
+ブルネイ bɯrɯne̞i
+稼ぐ kaseɡɯ
+行脚 ɑngʲɑ
+不審 ɸɯʃin
+まとう mɑtoː
+コンチェルト ko̞ntʃerɯto̞
+大学院 dɑigɑkɯin
+それでいて so̞ɾedeite
+踏ん張っ ɸɯnbɑʔ
+雪 jɯki
+漉く sɯkɯ
+厨房 tʃɯːbo̞ɯ
+太一 tɑitʃi
+丼 do̞nbɯri
+生活 seːkɑtsɯ
+勲章 kɯnʃo̞ɯ
+お洒落 oʃɑre
+年功 nenko̞ɯ
+ジュリアン dʒɯriɑn
+リーダーシップ riːdɑːʃiʔpɯ
+エントリー ento̞riː
+カリスマ kaɾisɯma
+言い立てる iitɑterɯ
+位 kɯrɑi
+狂う kɯrɯː
+如月 kisɑɾɑgi
+ロビイスト robiːsɯto
+スリム sɯɾimɯ
+はずし hɑzɯʃi
+害し gɑiʃi
+引き締まっ hikiʃimɑʔ
+役者 jɑkɯʃɑ
+緊張 kintʃo̞ɯ
+変動 hendo̞ɯ
+商社 ʃoːʃɑ
+墓穴 bo̞ket͡sɯ
+条件 dʒo̞ɯken
+在る ɑrɯ
+散らし tʃirɑʃi
+効用 ko̞ɯjo̞ɯ
+左腕 saɯan
+数えれ kɑzoere
+クリスマスツリー kɯrisɯmasɯtsɯriː
+くん kɯn
+マキシムギャラント mɑkiʃimɯ gʲɑrɑnto
+濤主席 toːʃɯseki
+兵力 heːrʲo̞kɯ
+汽 ki
+回る maɯarɯ
+イバニェス ibanezɯ
+防が fɯseɰᵝa
+物語 monogɑtɑri
+アプローチ ɑpɯroːtʃi
+見かける mikɑkerɯ
+惨敗 zɑnpɑi
+風越 kɑze koʃi
+銀座 ginzɑ
+幼児 jo̞ɯdʒi
+健在 kenzɑi
+富み to̞mi
+沖縄 okinaɯa
+芯 ʃin
+ジェボム dʒe bo̞mɯ
+共用 kʲo̞ɯjo̞ɯ
+カロリー kɑroriː
+永守 nɑgɑmori
+年生 ne̞nse̞ː
+春子 hɑrɯko
+オジさん oxisɑn
+満了 mɑnrʲoː
+積み重ね tsɯmikasane
+シフト ʃiɸɯto̞
+じり安 dʒiɾijasu
+混合 ko̞ngo̞ɯ
+コンタクト kontɑkɯto
+直せ nɑoze
+こっそり ko̞ʔso̞ɾi
+革命 kɑkɯmei
+正常 seːʒo̞ː
+競演 kʲo̞ɯen
+ロングセラー roŋgɯseraː
+すべて sɯbete
+リベラル riberɑrɯ
+雄 jɯː
+いち早く itʃihɑjɑkɯ
+請負 ɯkeo̞i
+植える ɯe̞rɯ
+麦畑 mɯgibɑtɑke
+熱 ne̞t͡sɯ
+ナルシスト naɾɯʃisɯto
+自覚 dʒikɑkɯ
+ボタン botɑn
+拓殖大学 tɑkɯʃokɯ dɑigɑkɯ
+セントレックス sentoɾeʔkɯsɯ
+限定 ge̞nte̞ː
+ぽ po̞
+鳩山 hɑtojɑmɑ
+ミサワ misaɯa
+人手 hito̞de
+うむ ɯmɯ
+アホウ ɑhoɯ
+気仙 ke̞ze̞n
+待た mɑtɑ
+ねじ伏せる nedʒifuseɾu
+カード kɑːdo
+ヶ ke̞
+フィジカル ɸidʒikɑrɯ
+寄附 kiɸɯ
+途 to̞
+カスタマイズ kasɯtamaizɯ
+バカげ bɑkɑge
+担う ninɑɯ
+出願 ʃɯt͡sɯgɑn
+おのずと o̞no̞zɯto̞
+潜る mo̞gɯrɯ
+応対 oːtɑi
+辞典 dʒite̞n
+廻る maɯarɯ
+放任 ho̞ɯnin
+られれ rɑrere
+低迷 te̞ːme̞ː
+地雷 dʒirɑi
+芽 me̞
+リスペクト risɯpekɯto
+住ん sɯn
+間中 mɑnɑkɑ
+損 so̞n
+満たし mitɑʃi
+プロミネンス pɯɾominensɯ
+決裂 ke̞t͡sɯre̞t͡sɯ
+たた tɑtɑ
+お陰さま okɑgesɑmɑ
+疲労 hiro̞ɯ
+吐く hɑkɯ
+以来 irɑi
+進化 ʃinkɑ
+問い to̞i
+死に ʃi ni
+京セラ kjoːseɾɑ
+休館 kʲɯːkɑn
+東京湾 tokʲo ɯan
+応じよ o̞ɯdʒijo̞
+信託 ʃintɑkɯ
+メタボ metɑbo
+事例 dʒire̞ː
+スヌーピー sɯnɯɯpii
+ら rɑ
+匿名 to̞kɯmeː
+書斎 ʃozɑi
+投げ掛ける nɑgekɑkerɯ
+清酒 seːʃɯ
+プーシスターズ pɯːshisɯtaːzɯ
+冷め sɑme
+カクテル kɑkɯterɯ
+三菱重工 mit͡sɯbiʃi dʒɯːko̞ɯ
+観光 kɑnkoː
+誇らしげ hokorɑʃige
+そういう soːiɯ
+修正 ʃɯːseː
+人聞き hito̞giki
+無責任 mɯsekinin
+明か ɑkirɑkɑ
+換算 kɑnsɑn
+やさしく jasaʃikɯ
+日足 hiɑʃi
+野比 no̞bi
+不正 fɯseː
+付ける t͡sɯke̞rɯ
+守り mɑmori
+土浦 t͡sɯtʃiɯrɑ
+絞る ʃibo̞rɯ
+緑 mido̞ri
+慰霊 ire̞ː
+裏切っ ɯrɑgiʔ
+エドワーズ edoɯaːzɯ
+プロ pɯro̞
+早稲田大学 wasedadaiɡakɯ
+あいまい ɑimɑi
+眠気 ne̞mɯke̞
+ホコリ ho̞ko̞ri
+遠井 to̞ː i
+テーマ teːmɑ
+象徴 ʃo̞ɯtʃo̞ɯ
+感服 kɑnpɯkɯ
+強烈 kʲo̞ɯret͡sɯ
+癒える ie̞rɯ
+破産 hɑsɑn
+教皇 kʲo̞ɯko̞ɯ
+ムスリム mɯsɯɾimɯ
+フラストレーション fɯɾasɯtoɾeːʃon
+石けん se̞ʔke̞n
+強化 kʲoːkɑ
+めん me̞n
+目黒 megɯro̞
+クラブ kɯrɑbɯ
+技 ɯaza
+訝し ibɯkɑʃi
+アーサ ɑːsɑ
+笑わ ɯaraɯa
+フープ ɸɯːpɯ
+悪用 ɑkɯjoː
+アニバーサリー ɑnibɑːsɑriː
+スパーリング sɯpaːɾiŋgɯ
+骨太 ho̞nebɯto̞
+興 kʲo̞ɯ
+ランナー rɑnnɑː
+艦船 kɑnsɯn
+まる mɑrɯ
+ガ gɑ
+乗れる no̞rerɯ
+暴力 bo̞ɯrʲo̞kɯ
+飲め no̞me
+上海 ʃɑnhɑi
+阻も hɑbɑmo
+締め ʃime̞
+ぶっ違い bɯʔtʃigɑi
+テキ te̞ki
+本読み ho̞ɲo̞mi
+こうした koː ʃitɑ
+見積 mit͡sɯmo̞ri
+追い出す ojdasɯ
+和み nɑgomi
+実刑 dʒiʔke̞ː
+人件 dʒinke̞n
+弓子 jɯmiko̞
+艶 ɑde
+上野 ɯeno̞
+殺さ koɾosɑ
+井筒 izɯt͡sɯ
+帳 tobɑri
+差し向ける saʃimɯkeɾɯ
+大塚 oːt͡sɯkɑ
+レ re̞
+美野 mino̞
+トチ to̞tʃi
+たかっ tɑkɑʔ
+さま sɑmɑ
+多 sɑvɑ
+ブッシュ bʊsʃɯ
+元々 mo̞to̞mo̞to̞
+光る hikɑrɯ
+内臓 nɑizoː
+バランスシート baransɯʃiːto
+マスカラ masɯkaɾa
+破滅 hɑmet͡sɯ
+京 kʲo̞ɯ
+キライ kirɑi
+病室 bʲo̞ɯʃit͡sɯ
+独身 do̞kɯʃin
+ガザ gɑzɑ
+創業 so̞ːgjo̞ː
+冷た t͡sɯmetɑ
+致し itɑʃi
+売る ɯrɯ
+都 to̞
+うなだれ ɯnɑdɑre
+肌寒い hadasamɯi
+えと eto̞
+廣 hiro̞ʃi
+怯え o̞bie
+もらえれ morɑere
+富士通 ɸɯdʒit͡sɯː
+切り捨て kiɾisɯte
+三月 sɑnegɑtsɯ
+作物 sakɯmotsɯ
+マクロ mɑkɯro
+片思い kɑtɑomoi
+侵害 ʃingɑi
+金持ち kɑnemotʃi
+自滅 dʒime̞t͡sɯ
+可否 kɑhi
+わたし ɯataʃi
+吹け ɸɯke̞
+謝り ɑjɑmɑri
+おいら oirɑ
+やっぱ jɑʔpɑ
+運転 ɯnte̞n
+かくして kɑkɯ ʃite
+貰う morɑɯ
+スイフトスポーツ sɯifɯtosɯpoːtsɯ
+財政 zɑiseː
+利子 riʃi
+好例 ko̞ːreː
+几帳面 kitʃo̞ɯmen
+しのぐ ʃino̞gɯ
+多大 tɑdɑi
+起こさ okosɑ
+キュー kʲɯː
+決算 kɛsɑn
+見渡す miwatasɯ
+大臣 dɑidʒin
+付き添っ tsɯkisoʔ
+艇 te̞ː
+意見 ike̞n
+体験 tɑiken
+笑う ɯaraɯ
+まち mɑtʃi
+奥歯 okɯbɑ
+プージャ pɯː dʒɑ
+香織 kɑori
+運搬 ɯnpɑn
+契機 ke̞ːki
+黒 kɯro̞
+ぐいと gɯi to̞
+集積 ʃɯːseki
+タイトル tɑitorɯ
+見え透い mjesɯi
+玉川 tamagaɯa
+狭く semakɯ
+シルバー ʃirɯbɑː
+サマーズ samaːzɯ
+生きる ikirɯ
+送る o̞kɯrɯ
+なんだか nɑn dɑ kɑ
+手放す tebanasɯ
+探る saɣɯɾɯ
+対立 tɑirit͡sɯ
+検閲 kenetsɯ
+彼女 kɑnodʒo
+滝沢 takizaɯa
+屈指 kɯsʃi
+八幡平 hɑtʃimɑntɑi
+動向 do̞ɯko̞ɯ
+通行人 t͡sɯːko̞ɯnin
+場面 bɑmen
+現職 genʃo̞kɯ
+変更 henko̞ɯ
+ウソ ɯso
+続か t͡sɯzɯkɑ
+聞き返し kikikɑeʃi
+相次い ɑit͡sɯi
+責 se̞me̞
+ヤンキー jɑnkiː
+割り振り ɯariɸɯri
+ドリカム dorikɑmɯ
+もらう morɑɯ
+パソコン pɑsokon
+インストール insɯtoːɾɯ
+た tɑ
+イヤ ijɑ
+この ko̞no̞
+異動 ido̞ɯ
+あたら ɑtɑrɑ
+コンテスト kontesɯto
+方法 ho̞ɯho̞ɯ
+霧 kiri
+面談 mendɑn
+軽微 ke̞ːbi
+球界 kʲɯːkɑi
+送っ o̞kɯʔ
+味気 ɑdʒike
+サリマンハウル saɾimanaɯɾɯ
+紙くず kɑmikɯzɯ
+とる to̞rɯ
+ぴんと pin to̞
+即答 sokɯtoː
+恐怖症 kʲo̞ɯɸɯʃo̞ɯ
+夕立 jɯːdɑtʃi
+蔚山 ɯɾɯsan
+謙造 kenzo̞ɯ
+来訪 rɑihoː
+勇壮 jɯːsowː
+頓挫 tonzɑ
+有数 jɯːsɯː
+閉じこもっ to̞dʒiko̞mo̞ʔ
+手ごたえ tegotɑe
+逃さ noɰᵝɑsɑ
+縫い合わせ nɯjawase
+生き返っ ikikɑeʔ
+預け ɑzɯke
+在籍 zɑiseki
+ボロい bo̞ro̞i
+見栄 mie̞
+関わり kakaɯari
+挙げ句 ɑgekɯ
+テラダモケイ terɑdɑmokei
+ストックホルム sɯtoʔkɯhoɾɯmɯ
+嗣 t͡sɯgɯ
+スタート sɯtaːto
+見え mie̞
+先方 senpo̞ː
+カメラロール kɑmerɑ roːrɯ
+ハンドル hɑndorɯ
+商 ʃo̞ɯ
+数 sɯː
+果敢 kɑkɑn
+予言 jo̞gen
+ベイビー be̞ibiː
+バファナ bɑɸɑnɑ
+ロジウム ro̞dʒiɯmɯ
+積ま t͡sɯmɑ
+苗 nɑe
+堀 ho̞ri
+容器 jo̞ɯki
+ぜひ ze̞hi
+スタント sɯtanto
+めかし mekɑʃi
+口 kɯtʃi
+拡販 kɑkɯhɑn
+次に t͡sɯgi ni
+おいし o̞iʃi
+コラムニスト koramɯnisɯto
+お祝い oiɯai
+ものすごく monosɯɡokɯ
+集まる ɑt͡sɯmɑrɯ
+全羅南道 zenrɑ nɑndoː
+薬指 kɯsɯɾijɯbi
+はいら hɑirɑ
+ヒノハラ hino hɑrɑ
+熱い ɑt͡sɯi
+言わ iɯa
+推薦 sɯisen
+再開 sɑikɑi
+リスト risɯto
+割れ ɯare
+水菜 mizɯnɑ
+酷かっ hidokɑʔ
+露 ro̞
+サハラ sɑhɑɾɑ
+多様 tɑjoː
+ヒノ hino̞
+スピンオフ sɯpinofɯ
+詳しい kɯɯaʃiː
+冠水 kansɯi
+牡丹 botɑn
+基金 kikin
+ヨーダ joːdɑ
+工房 ko̞ɯbo̞ɯ
+周辺 ʃɯːhe̞n
+以降 iko̞ɯ
+鳴き声 nɑkigoe
+寝不足 nefɯsokɯ
+抱き idɑki
+闘病 to̞ɯbʲo̞ɯ
+起こっ o̞ko̞ʔ
+ハラ hɑrɑ
+大体 dɑitɑi
+建ち並ん tɑtʃinɑrɑn
+プロジェクト pɯro̞dʒekɯto̞
+サラサラ sɑɾɑsɑɾɑ
+履い hɑi
+修理 ʃɯːri
+惨事 sɑɲi
+船舶 senpakɯ
+遂げ to̞ge
+能力 no̞ɯrʲo̞kɯ
+飲酒 inʃɯ
+宮本 mijɑmoto
+すがっ sɯɰᵝaʔ
+ヒト hito̞
+齋藤 sɑitoː
+帳簿 tʃo̞ɯbo̞
+不明 ɸɯme̞ː
+メーク me̞ːkɯ
+表さ ɑɾɑvɑsɑ
+店員 te̞nin
+日銀 nitʃigin
+フリガナ ɸɯrigɑnɑ
+全文 ze̞nbɯn
+送ら okɯrɑ
+おかず okɑzɯ
+惜敗 sekihɑi
+ごつごつ go̞t͡sɯgo̞t͡sɯ
+時点 dʒite̞n
+作業 sɑgjoː
+経つ tɑt͡sɯ
+祭り mɑt͡sɯri
+ときどき to̞kido̞ki
+避ける sakeɾɯ
+株主 kɑbɯnɯʃi
+ボロから borokɑrɑ
+三振 sɑnʃin
+立てこもり tɑtekomori
+堅持 ke̞ndʒi
+輪舞 rinbɯ
+覽下 ??
+話し合える hɑnɑʃiɑerɯ
+鰹 kɑt͡sɯo
+眠っ ne̞mɯʔ
+ドライバー dorɑibɑː
+社風 ʃɑɸɯː
+消防 ʃo̞ɯbo̞ɯ
+開設 kɑisetsɯ
+水道 sɯidoː
+飾りつける kɑzɑrit͡sɯkerɯ
+敗れ jɑbɯre
+うかがっ ɯkɑgɑʔ
+北京 pe̞kin
+あかり ɑkɑri
+ネイブ ne̞i bɯ
+喜び jo̞ro̞ko̞bi
+破壊 hɑkɑi
+見込み miko̞mi
+コレクター korekɯtɑː
+定食 teːʃo̞kɯ
+ボクシング bo̞kɯʃingɯ
+知人 tʃidʒin
+夫妻 fɯzai
+クチコミ kɯtʃiko̞mi
+鹿家 ʃikɑkɑ
+喜多方 kitɑkɑtɑ
+清潔 se̞ːke̞tsɯ
+学生 gakɯseː
+セクシー sekɯʃiː
+スモッグ sɯmoʔgɯ
+ござい gozɑi
+見違える mitʃigɑerɯ
+小町 komɑtʃi
+自作 dʒisaku
+璃子 riko̞
+飛ん to̞n
+思い描い omoiegɑi
+通し to̞ːʃi
+ワイン ɯain
+童顔 doːgɑn
+きわまり kiɯamari
+手放せ tebɑnɑse
+読み手 jo̞mite
+住友商事 sɯmitomoʃoːʒi
+僧侶 so̞ːɾjo̞
+悪かっ ɯarɯkaʔ
+ちょっぴり tʃo̞ʔpiri
+種類 ʃɯrɯi
+暖簾 no̞ren
+詐欺 sɑgi
+双眼鏡 soːɡɑnkjoː
+ジャガー dʒɑgɑː
+たちこめ tɑtʃikome
+いっしょ iʔʃo̞
+寄ら jorɑ
+足袋 tɑbi
+集う t͡sɯdo̞ɯ
+地下鉄 tʃikɑtet͡sɯ
+医療 irʲo̞ɯ
+メロン mero̞n
+予想 jo̞so̞ː
+苦戦 kɯzen
+嫌気 ijɑke
+夏場 nɑt͡sɯbɑ
+大忙し oɪsoɡɑʃi
+まずい mɑzɯi
+海運 kɑiɯn
+新米 ʃinmɑi
+対する taisɯɾɯ
+頼ん tɑnon
+訪問 ho̞ɯmo̞n
+電 de̞n
+降る ɸɯrɯ
+黒こげ kɯro̞ko̞ge
+ぶつける bɯt͡sɯke̞rɯ
+見殺し migo̞ro̞ʃi
+ケシ ke̞ʃi
+ロングシート ro̞ngɯ ʃiːto̞
+伊豆 izɯ
+トレード to̞reːdo̞
+警官 keːkɑn
+充電 dʒɯːde̞n
+直紀 nɑoki
+跡 ɑto
+両成敗 rjoːseibɑi
+ボール bo̞ːrɯ
+下宿 ge̞ʃɯkɯ
+日付 hizɯke̞
+胃腸 itʃo̞ɯ
+差出人 sɑʃidɑʃinin
+ちゃえ tʃɑe
+ヒタヒタ hitɑhitɑ
+正社員 seːʃɑin
+ポチャ po tʃɑ
+プラス pɯɾasɯ
+ダイハツ工業 dɑihɑt͡sɯ koːgʲoː
+カー kɑː
+劇的 ge̞kite̞ki
+やら jɑrɑ
+楽しかっ tɑnoʃikɑʔ
+廃業 hɑigʲoː
+ボイル bo̞irɯ
+静止 se̞ːʃi
+大がかり oːgɑkɑri
+追い込む o̞iko̞mɯ
+キリ kiri
+とお to̞ː
+いくらでも ikɯrɑ de mo
+一緒 iʔʃo̞
+鳥越 to̞rigo̞e
+利 ri
+過ごせる sɯɣoseɾɯ
+午前 go̞zen
+クスリ kɯsɯɾi
+パリ pɑri
+鈍い nibɯi
+けた ketɑ
+増永 masɯnaɣa
+克服 ko̞kɯɸɯkɯ
+まくり mɑkɯri
+ミキサー mikisɑː
+応用 o̞ɯjo̞ɯ
+優位 jɯːi
+プードル pɯːdo̞rɯ
+器 ɯt͡sɯɯa
+ノゲイラヒョードル nogeirɑ çoːdorɯ
+互角 gokɑkɯ
+キムタク kimɯtɑkɯ
+奥の手 o̞kɯno̞te
+アレキサンダー ɑɾekisɑndɑː
+本能 ho̞nno̞ɯ
+見つかる mit͡sɯkɑrɯ
+演習 e̞nʃɯː
+に関する nikansɯɾɯ
+入っ hɑiʔ
+ダントツ dɑntot͡sɯ
+タンク tɑnkɯ
+良く jo̞kɯ
+誘わ sɑsovɑ
+両側 rʲoːgaɯa
+始め hɑdʒime
+増益 zo̞ɯeki
+経歴 ke̞ːre̞ki
+進歩 ʃinpo̞
+弔問 tʃo̞ɯmo̞n
+ホラー horɑː
+残る no̞ko̞rɯ
+行進 ko̞ɯʃin
+完敗 kɑnpɑi
+お互い otɑgɑi
+厳か oɡosokɑ
+リトマス試験紙 ritomasɯʃikenʃi
+果たして hɑtɑʃite
+融合 jɯːgo̞ɯ
+集まり ɑt͡sɯmɑri
+月額 get͡sɯgɑkɯ
+勇気づけ jɯːkizɯke̞
+坂 sɑkɑ
+承認 ʃo̞ɯnin
+流通 rʲɯːt͡sɯː
+エン e̞n
+世田谷 setɑɰᵝɑjɑ
+いらっしゃる iɾaʔʃaɾɯ
+敢行 kɑnkoː
+如何 ikɑgɑ
+埋め合わせる ɯmeawaseɾɯ
+犠牲 gise̞ː
+同級生 doːkjɯːseː
+再 sɑi
+あきれ返る ɑkirekɑerɯ
+痛い itɑi
+理学 rigɑkɯ
+別物 bet͡sɯmo̞no̞
+患者 kɑndʒɑ
+奈良 nɑrɑ
+コーシーブレイク ko̞ːʃiː bɯreikɯ
+ので no̞ de
+せっかく seʔkakɯ
+幅 hɑbɑ
+規定 kite̞ː
+学ん mɑnɑn
+ブータン bɯːtɑn
+解剖 kɑiboː
+居候 iso̞ːɾo̞ː
+刷り sɯɾi
+倍 bɑi
+レコード reko̞ːdo̞
+ビエルサ bjeɾɯsa
+才能 sɑinoː
+酒造り sakezɯkɯrʲi
+なさけない nɑsɑkenɑi
+変態 hentɑi
+受話器 dʒɯɯaki
+風流 ɸɯːrʲɯː
+まみれ mɑmire
+滑走 kɑsoː
+日程 niʔte̞ː
+微量 birʲo̞ɯ
+物質 bʊsʃɪt͡sɯ
+彼方 kɑnɑtɑ
+柔道 dʒɯdo̞
+流 rʲɯː
+耳かき mimikɑki
+献身 ke̞nʃin
+向い mɯkɑi
+上院 dʒo̞ɯin
+公私 ko̞ɯʃi
+展示 te̞ndʒi
+自他 dʒitɑ
+一番 itʃibɑn
+音痴 o̞ntʃi
+広く hiro̞kɯ
+マザー mɑzɑː
+とある toɑrɯ
+予感 jokɑn
+踊っ o̞do̞ʔ
+五体 gotɑi
+よろしけれ jo̞ro̞ʃikere
+温暖 ondɑn
+段落 dɑnrɑkɯ
+ごしごし go̞ʃigo̞ʃi
+にて nite̞
+思考 ʃiko̞ɯ
+閉め切っ ʃime̞ kiʔ
+刺す sasɯ
+計り hɑkɑri
+激しく hɑgeʃikɯ
+公示 ko̞ɯdʒi
+負け惜しみ mɑkeoʃimi
+普通 ɸɯt͡sɯː
+成熟 seːjɯkɯ
+徹し te̞sʃi
+下す kɯdasɯ
+硫酸紙 rjɯːsanʃi
+入局 ɲɯːkʲo̞kɯ
+当然 to̞ɯzen
+再現 sɑigen
+ハマっ hɑmɑʔ
+きれい kire̞ː
+捨離 ??
+責める semeɾɯ
+チャイム tʃɑimɯ
+吾 ɯare
+電話 denɯa
+申し立てる moːʃitɑterɯ
+採用 sɑijoː
+マルホ mɑrɯho
+メルトダウン merɯtodɑɯn
+択一 tɑkɯit͡sɯ
+痔 dʒi
+ウインドウズ ɯindo̞ɯzɯ
+広岡 hiroːkɑ
+耐震 tɑiʃin
+第一人者 dɑiitʃininʃɑ
+打つ手 ɯt͡sɯ te̞
+倒し tɑoʃi
+もち mo̞tʃi
+算出 sanshɯtsɯ
+春樹 hɑrɯki
+自在 dʒizɑi
+主観 ʃɯkɑn
+くるし kɯrɯʃi
+姿 sɯɣata
+つながる t͡sɯnɑgɑrɯ
+セリングクライマックス seriŋgɯkɯraimaʔkɯsɯ
+入浴 ɲɯːjo̞kɯ
+中国人 tʃɯːgo̞kɯnin
+短く midʒikɑkɯ
+遺産 isɑn
+頭打ち ɑtɑmɑɯtʃi
+快晴 kɑiseː
+半 hɑn
+腕前 ɯdemɑe
+被る kɑbɯrɯ
+化学 kɑgɑkɯ
+下請け ʃitɑɯke
+偉 erɑ
+清明 se̞ːme̞ː
+リピート ripiːto̞
+バイヤー bɑijɑː
+モレスキン moɾesɯkin
+石渡 iʃiɯata
+人望 dʒinbo̞ɯ
+ぼろく bo̞ro̞kɯ
+船 ɸɯne̞
+討論 to̞ɯro̞n
+田辺 tɑnɑbe
+得意 to̞kɯi
+収入 ʃɯːɲɯː
+敵失 te̞kiʃit͡sɯ
+辞退 dʒitɑi
+ナチス nakʲ̩isɯ
+高波 tɑkɑnɑmi
+叩い tɑtɑi
+灯 ɑkɑri
+時代 dʒidɑi
+呈し te̞ːʃi
+迷わ mɑjoɑ
+こだわる kodaɯarɯ
+東海林 ʃo̞ɯdʒi
+柔らかい jaɯarakai
+上演 dʒo̞ɯen
+スクール sɯkɯːrɯ
+メイド meido̞
+清盛 kijo̞mo̞ri
+領域 rʲo̞ɯiki
+本国 ho̞ngo̞kɯ
+配布 hɑiɸɯ
+ちゃ tʃɑ
+並べる nɑrɑberɯ
+され sɑɾe
+整備 se̞ːbi
+詰め t͡sɯme̞
+地 tʃi
+ラオックス ɾaokekɯsɯ
+フィーゴ ɸiːgo̞
+寒風 kɑnpɯː
+年俸 nenpo̞ɯ
+漁船 gosen
+野 no̞
+当面 to̞ɯmen
+温め ɑtɑtɑme
+暴動 bo̞ɯdo̞ɯ
+いい気 ii ki
+ちょっとした tʃoʔto ʃitɑ
+トク to̞kɯ
+あー ɑː
+通勤 t͡sɯːkin
+物色 bʊsʃokɯ
+中洲 nakasɯ
+翌朝 jokɯasa
+寺社 dʒiʃɑ
+歌い手 ɯtɑite
+不参加 fɯsanka
+許し jɯrɯʃi
+激務 ge̞kimɯ
+配慮 hɑirʲo
+奥 o̞kɯ
+情状 dʒo̞ɯdʒo̞ɯ
+弾き hiki
+やすく jasɯkɯ
+さもなければ sɑmonɑkerebɑ
+お疲れさま otsɯkaɾesama
+途方 to̞ho̞ɯ
+一目瞭然 itʃimo̞kɯrʲo̞ɯzen
+スキー sɯkiː
+菊池 kikɯtʃi
+洗濯 sentakɯ
+ナポ nɑ po
+横向き jo̞ko̞mɯki
+集め ɑt͡sɯme
+青田買い ɑotɑ kɑi
+親 ojɑ
+稼い kɑsei
+汚く kitɑnɑkɯ
+難民 nɑnmin
+高度 ko̞ɯdo̞
+イソフラボン isofɯɾabon
+マスター masɯtaː
+忠誠 tʃɯːseː
+核心 kɑkɯʃin
+六 ro̞kɯ
+教師 kʲo̞ɯʃi
+同市 do̞ɯʃi
+庫 kɯrɑ
+質店 ʃitʃimize̞
+開く hirɑkɯ
+安田 jasɯda
+奥深 okɯbɯkɑ
+好条件 ko̞ɯdʒo̞ɯken
+周知 ʃɯːtʃi
+名画 meːgɑ
+特性 tokɯseː
+カフェ kɑɸe
+連休 re̞nkʲɯː
+おもしろい o̞mo̞ʃiro̞i
+府 ɸɯ
+無 mɯ
+圧力 ɑt͡sɯrʲokɯ
+鉄 te̞t͡sɯ
+描か egɑkɑ
+すぎ sɯgi
+せり se̞ɾi
+行なわ okonaɯa
+後退 koːtɑi
+緩ま jɯrɯmɑ
+目鼻立ち mehɑnɑdɑtʃi
+サッと sɑto
+存じ zo̞ndʒi
+軽量 keːrʲo̞ː
+キャスター kjasɯtaː
+えっ e̞ʔ
+返信 he̞nʃin
+さらに sɑɾɑni
+都市 to̞ʃi
+分娩 bɯnbe̞n
+通ら toːrɑ
+拷問 go̞ɯmo̞n
+となみ to nɑmi
+奪っ ɯbɑʔ
+事 ko̞to̞
+ほとんど ho̞to̞ndo̞
+長沼 nɑgɑnɯmɑ
+にわたり ni ɯatari
+人々 hito̞bito̞
+廃止 hɑiʃi
+眼下 gɑnkɑ
+強含み t͡sɯjo̞ɸɯkɯmi
+敏夫 to̞ʃio̞
+中医協 tʃɯːikʲo̞ɯ
+冷蔵庫 reːzo̞ːko̞
+自閉症 dʒiheːʃo̞ː
+薄着 ɯsɯgi
+帰国 kiko̞kɯ
+自虐 dʒigʲɑkɯ
+固め kɑtɑme
+リストラ risɯtoɾa
+怒鳴り donɑri
+オイル o̞irɯ
+インチ intʃi
+大相撲 oosɯmoː
+ツイ t͡sɯi
+ありがとう ɑrigɑtoː
+新春 ʃinʃɯn
+暇 himɑ
+底 so̞ko̞
+新曲 ʃinkʲo̞kɯ
+公認 ko̞ɯnin
+臭 nio̞i
+悔しい kɯjɑʃiː
+活動 kɑt͡sɯdoː
+ほんとに ho̞nto̞ ni
+ハンジュン hɑn dʒɯn
+食いしん坊 kɯiʃinbo̞
+解禁 kɑikin
+酸っぱく sɯʔpakɯ
+持続 dʒizo̞kɯ
+大賞 tɑiʃoː
+会合 kɑigoː
+エンジニア endʒiniɑ
+ラウンジ rɑɯndʒi
+続い t͡sɯzɯi
+手応え tegotɑe
+憶測 okɯsokɯ
+春 hɑrɯ
+政権 se̞ːke̞n
+いかなる ikɑnɑrɯ
+ダル dɑrɯ
+寄せ鍋 josenɑbe
+過去 kɑko
+なかっ nɑkɑʔ
+属 zo̞kɯ
+急増 kʲɯːzo̞ɯ
+流れ nɑgɑre
+量 rʲo̞ɯ
+白く ʃiro̞kɯ
+アジア ɑdʒiɑ
+ぼく bo̞kɯ
+おけ o̞ke
+咲かせ sɑkɑse
+渋滞 dʒɯːtɑi
+伺わ ɯkagaɯa
+有価 jɯːkɑ
+ダラクシー dɑrɑkɯ ʃiː
+悲惨 hisɑn
+熱々 ɑt͡sɯɑt͡sɯ
+身上 ʃindʒo̞ɯ
+酔っ払っ joʔpɑrɑʔ
+保険 ho̞ken
+酪農 rɑkɯnoː
+家庭 kɑteː
+街道 kɑidoː
+積立 t͡sɯmitɑte
+呼び出し jobidɑʃi
+始まる hɑdʒimɑrɯ
+カジュアル kɑdʒɯɑrɯ
+素 mo̞to̞
+赤裸々 sekiɾɑɾɑ
+利率 ririt͡sɯ
+雇用 ko̞jo̞ɯ
+メジャー medʒɑː
+エステ ezɯte
+たっぷり tɑʔpɯri
+スペクタクル sɯpekɯtakɯɾɯ
+皆さん minɑsɑn
+悩み nɑjɑmi
+マーチャント mɑːtʃɑnto
+局面 kʲo̞kɯmen
+部署 bɯʃo̞
+湯気 jɯge̞
+検出 ke̞nʃɯt͡sɯ
+構える kɑmɑerɯ
+引き立てる hikitɑterɯ
+国 kɯni
+西川 niʃikaɯa
+立ち上がり tɑtʃiɑgɑri
+事態 dʒitɑi
+ゆるく jɯrɯkɯ
+ぐにゃっとした gɯɲɑʔtoʃitɑ
+ゴア goɑ
+頂点 tʃo̞ɯten
+朝鮮 tʃɑʊ̯sən
+たかが tɑkɑ gɑ
+苦痛 kɯt͡sɯː
+時に to̞ki ni
+まさかの mɑsɑkɑno
+寄付 kiɸɯ
+小熊 kogɯmɑ
+田島 tɑdʒimɑ
+アルプス aɾɯpɯsɯ
+二者択一 niʃɑ tɑkɯit͡sɯ
+更迭 ko̞ɯtet͡sɯ
+縮図 ʃɯkɯzɯ
+上限 dʒo̞ɯgen
+鉱業 ko̞ɯgʲo̞ɯ
+よる jo̞rɯ
+低落 teːrɑkɯ
+サバ sɑbɑ
+ほほえましい hohoemɑʃiː
+プレゼン pɯre̞ze̞n
+杞憂 kijɯː
+油脂 jɯʃi
+オートマ oːtomɑ
+見放さ mihɑnɑsɑ
+以上 idʒo̞ɯ
+乾性 kɑnseː
+温まる ɑtɑtɑmɑrɯ
+読者 dokɯʃɑ
+駄目 dɑme
+レシート reʃiːto̞
+キー kiː
+たいした tɑiʃitɑ
+前年 ze̞nne̞n
+台湾 taiɯan
+実務 dʒit͡sɯmɯ
+セクション sekɯʃon
+幅広 hɑbɑhiro
+原因 gənin
+とたん totɑn
+高 ko̞ɯ
+コソボ ko̞so̞bo̞
+児童 dʒido̞ɯ
+すわっ sɯwaʔ
+背景 hɑikeː
+錯そう sakɯsoː
+玉虫色 tɑmɑmɯʃi iro
+岩 iɯa
+かもし kɑ moʃi
+おそらくは osorakɯwa
+門外 mongɑi
+コスプレ kosɯpɯɾe
+しばし ʃibɑʃi
+予選落ち jo̞seno̞tʃi
+懇親 ko̞nʃin
+描ける egɑkerɯ
+歯 hɑ
+校 ko̞ɯ
+公表 ko̞ɯço̞ɯ
+ジャー dʒɑː
+クールビズ kɯːrɯ bizɯ
+待機 tɑiki
+スラリ sɯɾaɾi
+ボケ bo̞ke
+捜索 soːsakɯ
+単体 tɑntɑi
+ツクイ t͡sɯkɯi
+初 hɑt͡sɯ
+正美 mɑsɑmi
+手出し tedɑʃi
+原子 ge̞nʃi
+広報 ko̞ɯho̞ɯ
+出会い deɑi
+黒沢 kɯɾosawa
+奏者 soːʃɑ
+脆弱 zeːdʒɑkɯ
+イチロー itʃiro̞ː
+グアム gɯɑmɯ
+ポーズ po̞ːzɯ
+コブ ko̞bɯ
+遠のく to̞ːno̞kɯ
+中華 tʃɯːkɑ
+繋がっ t͡sɯnɑgɑʔ
+結末 ket͡sɯmɑt͡sɯ
+阻害 soɰᵝɑi
+公園 ko̞ɯen
+大本 o̞ːmo̞to̞
+広々 hiro̞biro̞
+ちゃい tʃɑi
+実証 dʒiʔsɯo̞ː
+高田 tɑkɑdɑ
+姫 hime̞
+議事 gidʒi
+藤川 ɸɯdʒikaɯa
+理念 rine̞n
+考え方 kɑngɑekɑtɑ
+エリ e̞ri
+麻生 ɑsoː
+ファーストリテ faːsɯtoɾite
+届け to̞do̞ke
+姫路 hime̞dʒi
+移っ ɯt͡sɯʔ
+自衛隊 dʒieːtɑi
+捕虜 ho̞rʲo̞
+荒涼 ko̞ɯrʲo̞ɯ
+皿 sɑɾɑ
+助ける tasɯkeɾɯ
+累 rɯi
+仕分け ʃiɯake
+余地 jo̞tʃi
+狂態 kʲoːtɑi
+気配り kikɯbɑri
+ポピュリズム po̞pʲɯrizɯmɯ
+探せ sɑɣɑse
+酸性 sɑnseː
+テレカ terekɑ
+知恵 tʃie̞
+ガー gɑː
+離れよ hɑnɑrejo
+すご sɯɰᵝao
+主因 ʃɯin
+はて hɑte
+居住 kʲo̞dʒɯː
+スカッ sɯka
+おい o̞i
+アクション ɑkɯʃon
+訪中 ho̞ɯtʃɯː
+倒産 toːsɑn
+表 ço̞ɯ
+夜道 jo̞mitʃi
+裏切ら ɯrɑgirɑ
+陽動 jo̞ɯdo̞ɯ
+無い nɑi
+くすぶっ kɯsɯbɯʔ
+除 dʒo̞
+失う ɯʃinɑɯ
+注水 tʃɯːsɯi
+便 bin
+様 jo̞ɯ
+慎一郎 ʃinitʃiɾo̞ː
+並ん nɑrɑn
+戸田建設 todɑkensetsɯ
+サラリーマン sɑrɑriːmɑn
+長期 tʃo̞ɯki
+持合い motʃiɑi
+公務員 ko̞ɯmɯin
+にあたって ni ɑtɑʔte
+組 kɯmi
+浅見 sɛŋkən
+新譜 ʃinpɯ
+飾っ kɑzɑʔ
+飛騨 hidɑ
+パーツ pɑːt͡sɯ
+ケーキ ke̞ːki
+何となく nɑn to nɑkɯ
+先生 se̞nse̞ː
+掘り出し物 horidɑʃi mono
+過労 kɑroː
+御社 goʃɑ
+書き手 kɑkite
+ブラウザアプリ bɯrɑɯzɑ ɑpɯri
+隆 tɑkɑʃi
+荒れる ɑrerɯ
+平安 heːɑn
+曇らせ kɯmoɾase
+ちょく tʃo̞kɯ
+カーター kɑːtɑː
+実写 dʑisʃɑ
+でし de̞ʃi
+うがち ɯgɑtʃi
+みつかり mit͡sɯkɑri
+引きずり hikizɯri
+大将 tɑiʃoː
+潜伏 senpɯkɯ
+影響 eikʲo̞ː
+新興 ʃinko̞ɯ
+連盟 re̞nme̞ː
+龍舞 rʲɯːmɑi
+浦和レッズ ɯraɯa reʔzɯ
+走り回っ haʃirimaɯaʔ
+年の瀬 to̞shino̞ze
+劇 ge̞ki
+寒冷 kɑnreː
+先日 se̞ɲʒitsɯ
+志し kokorozɑʃi
+奴 jɑt͡sɯ
+経団連 keːdɑnren
+テキストエディタ tekisɯtoedita
+エレクエルピーダ erekɯ erɯpiːdɑ
+待望 tɑiboː
+憂い ɯre̞i
+スパイク sɯpaikɯ
+言わば iɯaba
+アン ɑn
+無配 mɯhɑi
+如く go̞to̞kɯ
+苦労 kɯro̞ɯ
+西日本 niʃiniʔpo̞n
+大物 o̞ːmo̞no̞
+何時も it͡sɯ mo̞
+主演 ʃɯe̞n
+歩 ɑjɯmi
+常 t͡sɯne̞
+言い逃れ iːnogɑre
+不滅 ɸɯme̞t͡sɯ
+パワポ paɯapo
+ほど遠い ho̞do̞ to̞ːi
+濁流 dɑkɯrʲɯː
+脅威 kʲo̞ɯi
+宋 so̞ː
+ビジネスマン bidʒinesuman
+加工 kɑkoː
+窒息 tʃɪʔsokɯ
+オフタイム oɸɯ tɑimɯ
+酒類 ʃɯrɯi
+年 ne̞n
+全米 ze̞nbe̞ː
+踏まえ ɸɯmɑe
+ソフトボール sofɯtoboːrɯ
+家探し jɯsɑgɑʃi
+命取り ino̞tʃito̞ri
+エネルギー e̞ne̞rɯgiː
+終わら oɯara
+寧 ne̞ː
+おかし okɑʃi
+アピール ɑpiːrɯ
+軸 dʒikɯ
+沽券 ko̞ken
+解散 kɑisɑn
+ふん ɸɯn
+眉間 mike̞n
+明後日 mʲo̞ɯgo̞nitʃi
+礼文 re̞ːbɯn
+肥厚 hiko̞ɯ
+高木 tɑkɑgi
+校閲 ko̞ɯet͡sɯ
+ひばり hibɑri
+葉 hɑ
+登板 toːbɑn
+魚 sɑkɑnɑ
+隠せ kakɯze
+発 hɑt͡sɯ
+見逃し minogɑʃi
+酒精 ʃɯseː
+制服 seːfɯkɯ
+ディレクター direkɯtɑː
+チェロ tʃero̞
+待たさ mɑtɑsɑ
+ドキュメンタリー dokʲɯmentɑriː
+とにかく tonikɑkɯ
+詫びる ɯabirɯ
+繰 ?
+踊れる o̞do̞rerɯ
+分目 bɯnme̞
+値する ataisɯɾɯ
+松下 mɑt͡sɯʃitɑ
+本田 hondɑ
+大門 dɑimon
+全勝 zenʃo̞ɯ
+ロマン romɑn
+脂肪 ʃibo̞ɯ
+編成 he̞nse̞ː
+老舗 ʃinizɨ
+切れ込ん kireko̞n
+お釣り o̞t͡sɯri
+クマル kɯmɑrɯ
+弾力 dɑnrʲokɯ
+いや ijɑ
+州 ʃɯː
+犯行 hɑnkoː
+児 ko̞
+クラウドファンディング kɯrɑɯdo ɸɑndingɯ
+婦 ɸɯ
+これから kore kɑrɑ
+ニコタマ nikotɑmɑ
+代わり kaɯari
+本質 ho̞nʃit͡sɯ
+ボート bo̞ːto̞
+囲碁 igo̞
+早く hɑjɑkɯ
+図柄 zɯgɑrɑ
+号令 go̞ːreː
+話せ hɑnɑse
+クロウ kɯro̞ɯ
+極秘 go̞kɯhi
+代目 dɑime
+書き込ま kɑkikomɑ
+とちの木 to̞tʃino̞ki
+真因 ʃinin
+扱お ɑt͡sɯkɑo
+下がる sagaɾɯ
+レバレッジ rebɑredʒdʒi
+不満 ɸɯmɑn
+配分 hɑibɯn
+捜査 soːsɑ
+提携 te̞ːke̞ː
+店主 te̞nʃɯ
+欧州 o̞ɯʃɯː
+論説 ronsetsɯ
+めいじん me̞ːdʒin
+突っ込み t͡sɯʔko̞mi
+即座 sokɯza
+負けん気 mɑken ki
+快音 kɑion
+自 dʒi
+有朋 ɑritomo
+塾 dʒɯkɯ
+チョコレート tʃo̞ko̞reːto̞
+設計 se̞ʔke̞ː
+保持 ho̞dʒi
+東レ to̞ɯre
+ファミリー ɸɑmiriː
+控えろ hikɑero
+西国 saigokɯ
+一斉 ise̞ː
+ミニバン minibɑn
+プロモーション pɯro̞mo̞ːʃo̞n
+舞台裏 bɯtɑi ɯrɑ
+不安定 ɸɯanteː
+町民 tʃo̞ɯmin
+物理 bɯt͡sɯri
+断わる kotoɯarɯ
+再会 sɑikɑi
+出逢っ deɑʔ
+サントス santosɯ
+來月 ??
+縁起 e̞ngi
+自治 dʒitʃi
+湧い ɯai
+総理 so̞ːɾi
+全域 ze̞niki
+宣告 senkokɯ
+諸国 ʃo̞ko̞kɯ
+流失 rʲɯːʃit͡sɯ
+本題 hondɑi
+詰まっ t͡sɯmɑʔ
+ハル hɑrɯ
+右腕 migiɯde̞
+はがせる hagaseɾɯ
+狭い semɑi
+しかしながら ʃikɑʃinɑgɑrɑ
+調べ ʃirɑbe
+谷 tɑni
+龍神 rʲɯːdʒin
+足りる tɑrirɯ
+中国語 tʃɯːgo̞kɯ go̞
+帯同 tɑidoː
+資する ʃisɯɾɯ
+京都 kʲo̞to̞
+軍 gɯn
+海 ɯmi
+空港 kɯːko̞ɯ
+プレイ pɯre̞i
+銀製 ginse̞ː
+オレンジ o̞rendʒi
+先天 se̞nte̞n
+分かれ目 ɯakareme
+拠点 kʲo̞ten
+ミドリ mido̞ri
+入手 ɲɯːʃɯ
+啓発 keːhɑt͡sɯ
+荒浜 ɑrɑhɑmɑ
+自主 dʒiʃɯ
+工法 ko̞ɯho̞ɯ
+指令 ʃire̞ː
+通して to̞ːʃite
+転がる korogɑrɯ
+引っ込ん hiʔko̞n
+熟成 dʒɯkɯseː
+馴れろ nɑrero
+臨時 rindʒi
+早稲田 vɑsedɑ
+揃え so̞ɾo̞e
+留学生 rjɯːgakɯseː
+ミナ minɑ
+臨む no̞zo̞mɯ
+佐渡 sɑdo
+付く t͡sɯkɯ
+呟い t͡sɯbɯjɑi
+オーナー oːnɑː
+そそれ so̞so̞ɾe
+減税 ge̞nze̞ː
+漏らす moɾasɯ
+作品 sakɯhin
+落書き rɑkɯgɑki
+スゴイ sɯɡoj
+写る ɯt͡sɯrɯ
+幸先 sɑisɑki
+送検 soːkan
+傾ける kɑtɑmɯkerɯ
+推移 sɯii
+冬型 ɸɯjɯgɑtɑ
+理経 rike̞ː
+強かっ t͡sɯjokɑʔ
+福寿 ɸɯkɯdʒɯ
+愛さ ɑisɑ
+彫刻 tʃo̞ɯko̞kɯ
+淡 aɯa
+良い jo̞i
+来日 rɑinitʃi
+本人 ho̞nnin
+ヤマダ電機 jɑmɑdɑ denki
+光っ hikɑriʔ
+根元 nemo̞to̞
+毛織物 ke o̞rimo̞no̞
+ペリー pe̞ri:
+火種 hidɑne
+粉 konɑ
+野球 jɑkʲɯː
+歸 ?
+衣替え koromogɑe
+摘ん t͡sɯn
+シャープ ʃɑːpɯ
+火の車 hinokɯrɯmɑ
+調 tʃo̞ɯ
+モータースポーツ moːtaːsɯpoːtsɯ
+舵取り kɑdʒitori
+中古 tʃɯːko̞
+くらべ kɯrɑbe
+捨てる sɯteɾɯ
+巻き込ま mɑkikomɑ
+どんどん do̞ndo̞n
+行け ike̞
+泣きどころ nɑkidokoro
+吹い ɸɯi
+入れ ire̞
+最速 saisokɯ
+できれ de̞kire̞
+続落 zokɯrɑkɯ
+サンディー sɑndiː
+対処 tɑiʃo
+特殊 to̞kɯʃɯ
+血みどろ tʃimido̞ro̞
+新城 ʃinʃiro̞
+切り込ん kiriko̞n
+無縁 mɯe̞n
+雑念 zɑt͡sɯnen
+大幅 oːhɑbɑ
+管理 kɑnri
+風雲児 ɸɯːɯndʒi
+資力 ʃirʲo̞kɯ
+久留 kɯrɯ
+高ぶる tɑkɑbɯrɯ
+後の祭り ɑto no mɑt͡sɯri
+お茶 otʃɑ
+秒 bʲo̞ɯ
+アーチェリー ɑːtʃeriː
+正憲 mɑsɑnoɾi
+バックス bakekɯsɯ
+上の空 ɯwanosoɾa
+ろうと ro̞ɯto̞
+真理 ʃinri
+毛馬 kemɑ
+金鉱 kinko̞ɯ
+倉下 kɯrɑ ʃitɑ
+トラブル torɑbɯrɯ
+マイナー mɑinɑː
+いわく iɯakɯ
+ラブホテル rɑbɯ hoterɯ
+焦燥 ʃoːsoː
+神田 kɑndɑ
+日章旗 niʔʃoːki
+功績 ko̞ːseki
+会議 kɑigi
+日本橋 nihonbɑʃi
+定時 te̞ːdʒi
+エクステリア ekɯsɯteɾja
+停戦 tɑisen
+収穫 ʃɯːkɑkɯ
+遠藤 endo̞ɯ
+主権 ʃɯke̞n
+動かさ ɯɰᵝakasa
+対岸 tɑigɑn
+者 mo̞no̞
+費用 hijo̞ɯ
+出来高 dekidɑkɑ
+シーツ ʃiːt͡sɯ
+侮辱 bɯdʒo̞kɯ
+大麻 tɑimɑ
+ガラガラ gɑrɑgɑrɑ
+両社 rʲoːʃɑ
+生保 seːho̞
+範囲 hɑni
+格下げ kɯɰᵝakɯsage
+飴 ɑme
+値動き neɯgo̞ki
+酔っ jo̞iʔ
+トラベル torɑberɯ
+ならでは narade ɯa
+仕組ん ʃikɯn
+握っ nigiʔ
+描け egɑke
+言い ii
+宗祖 ʃɯːso
+クイーンズ kɯiːnzɯ
+人選 dʑinsən
+きゅうり kʲɯːri
+閔行区 bin ko̞ɯ kɯ
+お送り o̞ːkɯri
+岩波書店 iɯanami ʃoten
+ジャ dʒɑ
+べき be̞ki
+闇夜 ɑɲɑ
+犯人 hɑnnin
+上げれ ɑgere
+シナリオ ʃinɑrio
+過熱 kɑnet͡sɯ
+弟子 de̞ʃi
+取る to̞rɯ
+床屋 tokojɑ
+寄生虫 kiseːtʃɯː
+リアリティー riɑritiː
+肉類 nikɯrɯi
+計量 keːrʲo̞ɯ
+冷淡 reːtɑn
+軽快 keːkɑi
+戸 to̞
+崩壊 hoːkɑi
+光彦 mit͡sɯhiko̞
+遺伝子 ide̞nʃi
+煽る ɑorɯ
+元気 ge̞nki
+引け hike̞
+なれ nɑre
+不利 ɸɯri
+行か ikɑ
+炎上 endʒo̞ɯ
+二つ ɸɯtɑt͡sɯ
+同行 do̞ɯko̞ɯ
+ハンドボール hɑndoboːrɯ
+蔵相 zo̞ɯʃo̞ɯ
+取ら torɑ
+新た ɑrɑtɑ
+安心 ɑnʃin
+補完 hokɑn
+リストラクチャリング risɯtorakɯxaɾiŋgɯ
+手持ち temo̞tʃi
+本当に ho̞nto̞ɯ ni
+深 ʃin
+慌て aɯate
+優越 jɯːe̞t͡sɯ
+左飛 sɑi
+社外 ʃɑgɑi
+即売 sokɯbai
+道警 do̞ːkeː
+斑 bɯtʃi
+駆動 kɯdo̞ɯ
+有賀 ɑrigɑ
+垣間見 kɑimɑmi
+お願い onegɑi
+上がっ ɑgɑʔ
+ファンド ɸɑndo
+蹴る ke̞rɯ
+怒れ o̞ko̞re
+とかく tokɑkɯ
+村上 mɯrɑkɑmi
+異 ko̞to̞
+替え kɑe
+例え tɑtoe
+差し sɑʃi
+構造 ko̞ɯzo̞ɯ
+聖徳太子 ʃoːtokɯ tɑiʃi
+拓 tɑkɯ
+行 ko̞ɯ
+曲がり角 mɑgɑrikɑdo
+改札 kɑisɑtsɯ
+政財界 seizɑikɑi
+意欲 ijo̞kɯ
+みじん切り midʒinkiri
+休まれ jasɯmaɾe
+理事 ridʒi
+配合 hɑigoː
+警備 ke̞ːbi
+家裁 kɑsɑi
+ハロウィン hɑroin
+増 zo̞ɯ
+元来 gɑnrɑi
+小豆島 ʃoːdoʃimɑ
+肯定 ko̞ɯteː
+今宮 imɑmijɑ
+オクサレ okɯsaɾe
+指 jɯbi
+石炭 sekitɑn
+食生活 ʃokɯseːkatsɯ
+アルファロメオ ɑrɯɸɑromeo
+枠組み ɯakɯgɯmi
+沈む ʃizɯmɯ
+賃 tʃin
+べた betɑ
+低下 teːkɑ
+指摘 ʃite̞ki
+リョーコ rʲo̞ːko̞
+鼻血 hɑnɑzi
+頻発 hinpɑt͡sɯ
+主幹 ʃɯkɑn
+歯磨き hɑmigɑki
+はじまっ hɑdʒimɑʔ
+子供 ko̞do̞mo̞
+孟 mo̞ɯ
+半分 hɑnbɯn
+検視 ke̞nʃi
+性欲 seːjokɯ
+浴衣 jɯkɑtɑ
+がっちり gɑʔtʃiri
+下限 kɑgen
+画面 gɑmen
+はい hɑi
+感涙 kɑnrɯi
+すぎれ sɯxiɾe
+全体 zentɑi
+便利 be̞nri
+スマイル sɯmaiɾɯ
+大半 tɑihɑn
+げ ge̞
+黄海 koːkɑi
+生きろ ikiro̞
+ふけ ɸɯke̞
+プロデュース pɯrodjɯːsɯ
+浮遊 ɸɯjɯː
+遅い o̞so̞j
+助長 dʒo̞tʃo̞ɯ
+少年院 ʃo̞ɯnenin
+補助 ho̞dʒo̞
+アフリカ ɑɸɯrikɑ
+耐える tɑerɯ
+レポート repo̞ːto̞
+クロワッサン kɯɾowaʔsan
+北朝鮮 kitɑtʃoːsen
+中性子 tʃɯːseːʃi
+総合大学 sowɡɯdaigakɯ
+若手 ɯakate
+現 ge̞n
+すごく sɯɡokɯ
+コントロール ko̞nto̞ro̞ːrɯ
+残り no̞ko̞ri
+派手 hɑde
+行かさ ikɑsɑ
+羨望 senbo̞ː
+スレイマン sɯɾeiman
+不明朗 ɸɯmeːro̞ː
+疑惑 giɯakɯ
+シート ʃiːto̞
+近頃 tʃikɑgoro
+布団 ɸɯto̞n
+ブランディング bɯrɑndingɯ
+金城湯池 kindʒo̞ɯ to̞ɯtʃi
+石油 sekijɯ
+花火 hɑnɑbi
+てる te̞rɯ
+雇っ jɑtoʔ
+加減 kɑgen
+複合 ɸɯkɯgo̞ɯ
+なす nasɯ
+肩幅 kɑtɑhɑbɑ
+嫌う kirɑɯ
+助手 dʒo̞ʃɯ
+そのうち sonoɯtʃi
+調味 tʃo̞ɯmi
+心底 ʃinso̞ko̞
+カイコガ kɑikogɑ
+仮面 kɑmen
+食わ kɯɯa
+ボイス boisɯ
+確実 kɑkɯdʒit͡sɯ
+上々 dʒo̞ɯdʒo̞ɯ
+矢 jɑ
+終盤 ʃɯːbɑn
+スピリチュアル sɯpiɾitʃwaɾɯ
+にくく nikɯkɯ
+県立 ke̞nrit͡sɯ
+有給 jɯːkʲɯː
+曙 ɑkebono
+一段と itʃidɑn to
+治虫 osamɯ
+客層 kjakɯsoː
+われわれ ɯareɯare
+出会わ deaɯa
+きり kiri
+商い ɑkinɑi
+とまら tomɑrɑ
+連ね t͡sɯrɑne
+ユウウツ jɯːɯt͡sɯ
+生体 seːtɑi
+少数 ʃoːsɯː
+考え出せ kɑŋgɑedɑse
+掛け kɑke
+打ち ɯtʃi
+住友 sɯmitomo
+恩人 o̞ndʒin
+ジェントルマン dʒentorɯmɑn
+本当は hontoː ɯa
+プロセス pɯɾosesɯ
+どうか doː kɑ
+速度 sokɯdo
+くれる kɯre̞rɯ
+神山 kɑmijɑmɑ
+興国 ko̞ɯko̞kɯ
+一段落 itʃi dɑnrɑkɯ
+闘っ tɑtɑkɑʔ
+東京芸大 tokʲogeːdɑi
+少女 ʃo̞ɯdʒo̞
+担い ninɑi
+図面 zɯme̞n
+相づち ɑizɯtʃi
+育成 ikɯseː
+マニュアル mɑɲɯɑrɯ
+またがっ mɑtɑ gɑʔ
+桐山 kirijɑmɑ
+手術 ʃɯdʒɯt͡sɯ
+川内 sendɑi
+ゃまはげ mɑ hɑge
+扱い ɑt͡sɯkɑi
+酒屋 sɑkɑjɑ
+獲得 kɑkɯtokɯ
+マウス maɯsɯ
+面白 o̞mo̞ʃiro̞
+はがき hɑgɑki
+メタル metɑrɯ
+画像 gɑzoː
+通う kɑjoɯ
+漫然と mɑnzen to
+論調 ro̞ntʃo̞ɯ
+かれ kɑre
+巡る me̞gɯrɯ
+寸前 sɯnzɯn
+点数 tensɯː
+ミート miːto̞
+冗談 dʒoːdɑn
+オベロイ o̞bero̞i
+ホロホロ ho̞ro̞ ho̞ro̞
+女癖 ɔnnagɯze
+不可欠 ɸɯkɑket͡sɯ
+小田 odɑ
+香水 koːsɯi
+キヤノン kijɑnon
+しみる ʃimirɯ
+満たせ mitɑse
+ジュセリーノ jɯseriːno
+オーラ oːrɑ
+天皇陛下 tennoː heːkɑ
+看病 kɑnbʲoː
+上手い ɯmɑi
+任せ mɑkɑse
+かわいらしく kaɯairaʃikɯ
+骨肉 ko̞t͡sɯnikɯ
+信弘 no̞bɯhiro̞
+社員 ʃɑin
+バザー bɑzɑː
+婚 ko̞n
+名所 meːʃo̞
+ハンガリー hɑngɑriː
+怨嗟 ensɑ
+内壁 nɑiheki
+漫喫 mɑnkit͡sɯ
+いらっしゃい iɾɑʔʃɑi
+民衆 minʃɯː
+誠意 se̞ːi
+取り組む to̞rikɯmɯ
+カルテ kɑrɯte
+生かさ ikɑsɑ
+いたずら itɑzɯrɑ
+義平 joʃihirɑ
+良 rʲo̞ɯ
+新幹線 ʃinkɑnsən
+木目 mo̞kɯme
+交付 ko̞ɯɸɯ
+何卒 nɑnitozo
+です desɯ
+間違い mɑtʃigɑi
+クリーミー kɯriːmiː
+除か nozokɑ
+中長期 tʃɯːtʃo̞ɯki
+安定 ɑnteː
+里美 sɑtomi
+駆け kɑke
+なで下ろし nɑdeoroʃi
+転々 te̞nte̞n
+卒倒 so̞ʔto̞ː
+ポスター posɯtaː
+悔しかっ kɯjɑʃikɑʔ
+小物 ko̞mo̞no̞
+会津 ɑizɯ
+許せ jɯɾɯse
+矢巾 jɑhɑbɑ
+冷たい t͡sɯmetɑi
+見せ mise̞
+おしい o̞ʃiː
+チャラ tʃɑrɑ
+医道 ido̞ɯ
+代わり映え kaɯaribae
+背負わ seovɑ
+突っこま t͡sɯʔkomɑ
+年長 nentʃo̞ɯ
+公安 koːɑn
+支持 ʃidʒi
+加入 kɑɲɯː
+受難 dʒɯnɑn
+通りすがり tooɾisɯgaɾi
+行ける ike̞rɯ
+惣菜 soːzɑj
+不在 ɸɯzɑi
+竿 sɑo
+経由 ke̞ːjɯ
+こたつ kotɑt͡sɯ
+連結 re̞nke̞t͡sɯ
+待ち時間 mɑtʃi dʒikɑn
+炊飯 sɯihan
+かかえる kɑkɑerɯ
+高まり tɑkɑmɑri
+アーチ ɑːtʃi
+よう jo̞ɯ
+ハウス食品 haɯsɯʃokɯhin
+機会 kikɑi
+ダメダメ dɑme dɑme
+を ɯo
+生還 seːkɑn
+そ so̞
+レンゲ re̞nge̞
+順風 dʒɯnpɯː
+という to̞ iɯ
+できる de̞kirɯ
+豆乳 to̞ɯɲɯː
+アラサーシングル aɾasaːshiᵑgɯɾɯ
+専属 senzokɯ
+手軽 tegɑrɯ
+黒い kɯro̞i
+陸 rikɯ
+予測 josokɯ
+競馬 keːbɑ
+カメラ kɑmerɑ
+善人 ze̞nnin
+わざわざ ɯazaɯaza
+不祥事 ɸɯʃo̞ɯdʒi
+発明 hɑt͡sɯmeː
+雑技 zɑt͡sɯgi
+相次ぐ ɑit͡sɯgɯ
+同日 do̞ɯdʒit͡sɯ
+兆 tʃo̞ɯ
+精細 seːsɑi
+母性 bo̞seː
+答 kotɑe
+楽しく tɑnoʃikɯ
+運び hɑkobi
+壊れる koɯarerɯ
+受ける ɯke̞rɯ
+ニコニコ niko̞niko̞
+チカン tʃikɑn
+残業 zɑngʲoː
+松江 mɑt͡sɯe
+農家 noːkɑ
+えみ e̞mi
+人文学部 dʒinbɯngɑkɯ bɯ
+花巻 hɑnɑmɑki
+セーブ seːbɯ
+相続 sowzokɯ
+上原 ɯehɑrɑ
+説教 seʔkjo̞ː
+不屈 ɸɯkɯt͡sɯ
+参戦 sɑnsɯn
+かしげ kɑʃige
+とすれば tosɯreba
+ぐぐたす gɯɰᵝatasɯ
+健常 kendʒo̞ɯ
+オーストリア oːsɯtoɾija
+酢 sɯ
+改善 kɑizen
+同業 do̞ɯgʲo̞ɯ
+ベアリング beɑringɯ
+凡庸 boɲɯ
+売場 ɯribɑ
+講演 ko̞ɯen
+とん to̞n
+流行り hɑjɑri
+旗揚げ hɑtɑːge
+回避 kɑihi
+くぐる kɯgɯrɯ
+一流 itʃirʲɯː
+男好き o̞to̞ko̞zɯki
+サンゴ sɑŋgo
+好き sɯki
+従事 dʒɯːdʒi
+新人 ʃindʒin
+国富 ko̞kɯɸɯ
+旧知 kʲɯːtʃi
+あの ɑno
+狙わ neraɯa
+お母さま ohɑhɑsɑmɑ
+横断 oːdɑn
+折々 o̞rio̞ri
+同額 doːgɑkɯ
+コス kosɯ
+ボトル bo̞to̞rɯ
+処分 ʃo̞bɯn
+ひと月 hito gɑt͡sɯ
+特需 to̞kɯdʒɯ
+まとめる mɑtomerɯ
+どうしても do̞ɯ ʃite mo̞
+網戸 ɑmido
+心残り ko̞ko̞ro̞no̞ko̞ri
+確率 kɑkɯrit͡sɯ
+取得 ʃɯto̞kɯ
+鶏 niɯatori
+大きな oːkinɑ
+歩み ɑjɯmi
+ムダ mɯdɑ
+奥手 o̞kɯte
+与謝野 josɑno
+利点 rite̞n
+夏彦 nɑt͡sɯhiko
+マイケル mɑikerɯ
+輸送 jɯsoː
+豊富 ho̞ɯɸɯ
+地面 dʒime̞n
+ぬく nɯkɯ
+検察 kensatsɯ
+思春 ʃiʃɯn
+あずさ asɯsa
+のら norɑ
+しみ ʃimi
+資金繰り ʃikin kɯri
+飲茶 jɑmɯtʃɑ
+イエス jezɯ
+直 dʒikɑ
+セブン sebɯn
+息抜き ikinɯki
+そこら sokorɑ
+キユーピー kijɯːpiː
+聞け kike̞
+凄絶 se̞ːze̞tsɯ
+測定 sokɯteː
+タックスヘイブン taʔkɯsɯheibɯn
+つまんない t͡sɯmɑnnɑi
+ジョン dʒo̞n
+シワ ʃiɯa
+挑ん ido̞n
+払わ haraɯa
+共済 kjoːsɑi
+知性 tʃise̞ː
+ステージ sɯteːdʒi
+皮切り kaɯakiri
+凝らさ koɾɑsɑ
+かげ kɑge
+生まれる ɯmɑrerɯ
+使命 ʃime̞ː
+日にち hinitʃi
+小樽 otɑrɯ
+氷 ko̞ːri
+じき dʒiki
+ローカライズ roːkɑrɑizɯ
+わか ɯaga
+道端 mitʃibɑtɑ
+付録 ɸɯro̞kɯ
+療養 rʲo̞ɯjo̞ɯ
+ウディ ɯdi
+あゆみ ɑjɯmi
+幼少 jo̞ɯʃo̞ɯ
+著者 tʃoʃɑ
+おそらく osoɾakɯ
+帽子 bo̞ɯʃi
+猪木 ino̞ki
+パジャマ pɑdʒɑmɑ
+何と nɑn to
+邂逅 kɑikoː
+間近 mɑzikɑ
+研 ke̞n
+高校 ko̞ɯko̞ɯ
+ヒョードル ço̞ːdo̞rɯ
+どうして do̞ɯ ʃite
+雅博 mɑsɑiɾo
+連れ t͡sɯre̞
+冷徹 re̞ːte̞t͡sɯ
+こまぬい komɑnɯi
+野外 jɑgɑi
+小西 ko̞niʃi
+甘やかさ ɑmɑjɑkɑsɑ
+主要 ʃɯjo̞ɯ
+まわっ maɯaʔ
+せる seɾɯ
+取り上げよ toriɑgejo
+石碑 se̞kihi
+足 ɑʃi
+コツ ko̞t͡sɯ
+ヒーロー hiːro̞ː
+串 kɯʃi
+腕試し ɯdedɑmeʃi
+敬意 ke̞ːi
+柏 kaʃiɯa
+中継 tʃɯːke̞ː
+秀 ʃɯː
+絡み合っ kɑrɑmiɑʔ
+恐い koɯai
+口臭 ko̞ɯʃɯː
+就労 ʃɯːro̞ɯ
+太っ ɸɯto̞ʔ
+焼く jɑkɯ
+訳 ɯake
+甘かっ ɑmɑkɑʔ
+たどり着けれ tɑdorit͡sɯkere
+インタファクス通信 intafakɯsɯtsɯːshin
+抜き nɯki
+蛙 kaɯazɯ
+迎える mɯkɑerɯ
+らしく rɑʃikɯ
+コピペ ko̞pipe
+ノウハウ noɯhɑɯ
+早寝 hɑjɑne
+関わっ kakaɯaʔ
+凧 tɑko
+酒田 sɑkɑtɑ
+ださ dɑsɑ
+カジノ kɑdʒino
+語気 go̞ki
+嗣が t͡sɯgɑ
+張り合お hɑriɑo
+兼 ke̞n
+早大 soːdɑi
+前祝い maeiɯai
+お断り okotoɯari
+弱まる joɯamarɯ
+答え kotɑe
+サムスン samɯsɯn
+飛び込ん to̞biko̞n
+体調 tɑitʃoː
+細部 saibɯ
+吹き荒れる ɸɯkiɑrerɯ
+臆測 okɯsokɯ
+放射 hoːʃɑ
+食堂 ʃo̞kɯdo̞ɯ
+手下 tekɑ
+覆い o̞ːi
+医学 igɑkɯ
+おせ o̞ze
+コネ ko̞ne
+まつげ mɑt͡sɯge
+導く mitʃibikɯ
+取り巻く torimɑkɯ
+牡鹿 odʒikɑ
+各駅 kɑkɯeki
+官邸 kɑnteː
+国防 ko̞kɯbo̞ɯ
+馬脚 bɑkʲɑkɯ
+する sɯɾɯ
+モデル mo̞derɯ
+減速 gensokɯ
+潜在 senzɑi
+商船 ʃo̞ːsen
+農業 no̞ɯgʲo̞ɯ
+浸透 ʃinto̞ɯ
+視界 ʃikɑi
+宗派 ʃɯːhɑ
+多芸 tɑgeː
+届い to̞do̞i
+通り to̞ːri
+ガレージ gɑreːdʒi
+現し araɯaʃi
+フリル ɸɯrirɯ
+動意 do̞ɯi
+縄跳び naɯatobi
+負け mɑke
+賢明 ke̞nme̞ː
+新しい ɑtɑrɑʃiː
+終わっ oɯaʔ
+蒸し mɯʃi
+ダーク dɑːkɯ
+新庄 ʃindʒo̞ɯ
+曳光弾 eːkoːdɑn
+往診 o̞ɯʃin
+種別 ʃɯbe̞t͡sɯ
+口調 kɯtʃo̞ɯ
+隣 tonɑri
+不出来 ɸɯde̞ki
+束縛 sokɯbakɯ
+笑い ɯarai
+イタリア itɑriɑ
+ダウンロード dɑɯnroːdo
+的 mɑto
+賭け kɑke
+開所 kɑiʃo
+泥沼 doronɯmɑ
+控え hikɑe
+待ち mɑtʃi
+深化 ʃinkɑ
+湿 ʃit͡sɯ
+少なくとも sɯkɯnakɯtomo
+っけ ʔke̞
+大宮 oːmijɑ
+純 dʒɯn
+ボヤき bojɑki
+やりとり jɑritori
+きもの kimo̞no̞
+敏 sɑtoʃi
+もっ mo̞ʔ
+休暇 kʲɯːkɑ
+せめて se̞me̞te̞
+雑音 zɑt͡sɯon
+併合 heːgo̞ː
+戸松 tomɑt͡sɯ
+鍾馨稼 ʃoːkɑorɯkɑ
+絞り出す ʃiboɾidasɯ
+生存 seːzo̞n
+被災 hizɑi
+デナリ denɑri
+動きだし ɯgoki dɑʃi
+掴ん t͡sɯkɑn
+さじ sɑʒi
+歯がゆい hɑgɑjɯi
+人気 ninki
+パンドラ pɑndorɑ
+当年度 to̞ɯnendo̞
+退き ʃirizo̞ki
+歩む ɑjɯmɯ
+根強い nezɯjo̞i
+アート ɑːto
+梶 kɑdʒi
+バイオリニスト bajorinisɯto
+背骨 sebo̞ne
+追い詰める o̞it͡sɯmerɯ
+リタイア ritɑiɑ
+イタリアン itɑriɑn
+積ん t͡sɯn
+襲撃 ʃɯːge̞ki
+しり ʃiri
+ルー rɯː
+デッサン desɑn
+堂島 doːdʒimɑ
+テラス teɾasɯ
+シェルター ʃerɯtɑː
+分類 bɯnrɯi
+凄 sɯɰᵝao
+客席 kjakɯseki
+病院 bʲo̞ɯin
+アイスクリン aisɯkɯrin
+からめ kɑrɑme
+激辛 gekikɑrɑ
+らしい rɑʃiː
+コーヒー ko̞ːhiː
+ビジネス bidʒinesu
+底値 so̞ko̞ne
+目元 memo̞to̞
+草履 zo̞ɯri
+打ち破る ɯtʃijɑbɯrɯ
+四球 ʃikʲɯː
+ひとつひとつ hito̞t͡sɯhito̞t͡sɯ
+縁側 engaɯa
+浮世 ɯkijo̞
+扱っ ɑt͡sɯkɑʔ
+そしたら soʃitɑɾɑ
+外見 gɑiken
+増設 zoː̯setsɯ
+後人 ko̞ɯdʒin
+阿須 asɯ
+ども do̞ mo̞
+欠場 ket͡sɯdʒo̞ɯ
+コード ko̞ːdo̞
+曜 jo̞ɯ
+かく kɑkɯ
+権 ke̞n
+残 zɑn
+県勢 ke̞nse̞ː
+スペイン sɯpein
+成海 nɑrɯmi
+滑っ sɯbeʔ
+向け mɯke̞
+びっしり biʔsiɾi
+壊滅 kɑimet͡sɯ
+独断 dokɯdɑn
+コシヒカリ koʃihikɑri
+ふたつ ɸɯtɑt͡sɯ
+吸塵力 kʲɯːgo̞mirʲo̞kɯ
+ウォール街 oːrɯgɑi
+ペリー pe̞riː
+ゅっと jɯʔto
+フォックスコン foʔkɯsɯkɔn
+同罪 doːzɑi
+行こ iko̞
+ノルウェー no̞rɯeː
+とめ to̞me
+眼差し mɑnɑzɑʃi
+独善 do̞kɯzen
+ヨーカ joːkɑ
+いぶし銀 ibɯʃigin
+帰れ kɑere
+いつも it͡sɯ mo̞
+輝き kɑgɑjɑki
+前日 ze̞ndʒit͡sɯ
+いし iʃi
+算 sɑn
+風土 ɸɯːdo̞
+急ぎ iso̞gi
+どうも do̞ɯ mo̞
+日頃 higo̞ro̞
+焼尻島 jɑkiʃiritoː
+ふかさ fɯkasa
+限り kɑgiri
+団結 dɑnket͡sɯ
+洗う ɑrɑɯ
+莫大 bɑkɯdɑi
+立と tɑto
+セラコン serɑkɔn
+チリ tʃiri
+裏目 ɯrɑme
+といった toiʔtɑ
+広める hiro̞merɯ
+今季 ko̞nki
+敦司 ɑt͡sɯʃi
+細野 o̞so̞no̞
+暑い ɑt͡sɯi
+明らか ɑkirɑkɑ
+普及 ɸɯkʲɯː
+グーグル gɯːgɯrɯ
+絶 ze̞
+当局 to̞ɯkʲo̞kɯ
+はじ hɑdʒi
+習う nɑrɑɯ
+商戦 ʃo̞ːsen
+標準 ço̞ɯdʒɯn
+全面 ze̞nme̞n
+優し jɑsɑʃi
+折りたたみ oritɑtɑmi
+小道 ko̞mitʃi
+抱か kɑkɑe kɑ
+制裁 seːsɑi
+マニア mɑniɑ
+惻隠 sokɯin
+臓器 zo̞ɯki
+打ち明ける ɯtʃiɑkerɯ
+売上 ɯriɑge
+予材管 jozɑi kɑn
+回す mawasɯ
+じれる dʒire̞rɯ
+張 tʃo̞ɯ
+三木 miki
+電源 de̞nge̞n
+浮つい ɯɯat͡sɯi
+帯び o̞bi
+パパイヤ pɑpɑijɑ
+寝取ら netorɑ
+連呼 renko̞
+土壌 do̞dʒo̞ɯ
+鳴らし nɑrɑʃi
+衆院 ʃɯːin
+にかけて ni kɑkete
+不要 ɸɯjo̞ɯ
+運航 ɯnko̞ɯ
+海岸 kɑigɑn
+くぐっ kɯgɯʔ
+白星 ʃiro̞bo̞ʃi
+先ごろ sɑkiɣoɾo
+解釈 kɑiʃɑkɯ
+抵抗 teːko̞ː
+要するに joːsɯɾɯni
+喋り ʃɑberi
+寒い samɯi
+少子化 ʃoːʃikɑ
+プーホルス pɯːhoɾɯsɯ
+亀井 kɑmei
+吸入 kʲɯːɲɯː
+さっそく saʔsokɯ
+陸上 rikɯdʒo̞ɯ
+本位 ho̞ni
+学習 gɑkɯʃɯː
+述べ no̞be
+財源 zɑigen
+業績 goːseki
+もろに mo̞ro̞ ni
+ぼちぼち bo̞tʃibo̞tʃi
+ドレス doɾesɯ
+蔵 kɯrɑ
+抜い nɯi
+起工式 kiko̞ɯ ʃiki
+てれ te̞re̞
+陳述 tʃindʒɯt͡sɯ
+掲げる kɑkɑgerɯ
+さ迷っ sɑmɑjoʔ
+麩 ɸɯ
+稼働 kɑdoː
+リカバー rikɑbɑː
+ディーゼル diːze̞rɯ
+おめでとう o̞medeto̞ɯ
+挂 ?
+法案 hoː ɑn
+年明け toʃiɑke
+近所 kindʒo̞
+たる tɑrɯ
+重患 dʒɯːkɑn
+封切り ɸɯːkiri
+門 mo̞n
+いける ike̞rɯ
+目玉 medɑmɑ
+仲間入り nɑkɑmɑiri
+派兵 hɑheː
+上杉 wesɯgi
+一夜 itʃijɑ
+間に合う mɑniɑɯ
+国王 ko̞kɯo̞ɯ
+支払え ʃihɑrɑe
+ってか ʔte kɑ
+何せ nɑnise
+省吾 ʃo̞ɯgo̞
+ノ no̞
+東口 higɑʃigɯtʃi
+糸 ito̞
+青山学院大学 ɑojɑmɑ gɑkɯin dɑigɑkɯ
+見舞 mimɑi
+強硬 kʲo̞ɯko̞ɯ
+自発 dʒihɑt͡sɯ
+上司 dʒo̞ɯʃi
+マスコミ masɯkomi
+純血 dʒɯnke̞t͡sɯ
+ジョーク dʒo̞ːkɯ
+立場 tɑtʃibɑ
+クルーザー kɯrɯːzɑː
+誕生 tɑndʒoː
+根城 nedʒiro̞
+偽善 gize̞n
+駆り立て kɑritɑte
+凍りつい ko̞ːrit͡sɯi
+原価 genkɑ
+永遠 e̞ːe̞n
+くっきり kɯʔkiri
+うらやん ɯrɑjɑn
+確かめる tɑʃikɑmerɯ
+ゴロゴロ go̞ro̞go̞ro̞
+解任 kɑinin
+ポルシェ po̞rɯʃe
+アマゾン ɑmɑzon
+能楽堂 noːgɑkɯdoː
+微粒子 birʲɯːʃi
+独り者 hito̞rimo̞no̞
+免許 menkʲo̞
+厳島 it͡sɯkɯʃimɑ
+商人 ʃo̞ɯnin
+自問自答 dʒimo̞n dʒito̞ɯ
+視聴 ʃitʃo̞ɯ
+家屋 kɑokɯ
+払い hɑrɑi
+アサド ɑsɑdo
+まぎれ mɑgire
+上手く ɯmɑkɯ
+トロトロ to̞ro̞to̞ro̞
+タワー taɯaː
+モデスト modesɯto
+物 mo̞no̞
+現れる araɯarerɯ
+空洞 kɯːdo̞ɯ
+移行 iko̞ɯ
+おち o̞tʃi
+近く tʃikɑkɯ
+服巻 hɑrɑmɑki
+便秘 be̞npi
+揺れ動く jɯreɯgo̞kɯ
+焼き上げ jɑkiɑge
+編 he̞n
+作り上げる t͡sɯkɯriɑgerɯ
+見 mi
+がん gɑn
+お茶漬け otʃɑzɯke
+クレソン kɯɾeson
+説く to̞kɯ
+キャリングケース kjaringɯkeːsɯ
+自省 dʒiseː
+差別 sɑbetsɯ
+マイミク mɑimikɯ
+掛かる kɑkɑrɯ
+アドバンテス adobantesɯ
+自浄 dʒidʒo̞ɯ
+大人 otonɑ
+眠れ ne̞mɯre̞
+世代 sedɑi
+部員 bɯin
+たまっ tɑmɑʔ
+論議 ro̞ngi
+先ず mɑzɯ
+新築 ʃintʃikɯ
+背筋 sesɯʒi
+コミュニティ ko̞mʲɯniti
+当てはまっ ɑtehɑmɑʔ
+まちまち mɑtʃimɑtʃi
+ちび tʃibi
+しよう ʃijo̞ɯ
+縮小 ʃɯkɯʃo̞ɯ
+年々 ne̞nne̞n
+組合 kɯmiɑi
+河 kaɯa
+温度 o̞ndo̞
+チーム tʃiːmɯ
+学ぶ mɑnɑbɯ
+肌 hɑdɑ
+登る no̞bo̞rɯ
+暗闇 kɯrɑjɑmi
+むでしょう mɯ deʃo̞ɯ
+年末年始 nenmɑt͡sɯ nenʃi
+謝っ ɑjɑmɑʔ
+遊び ɑsobi
+陰気 inki
+浄土真宗 dʒo̞ɯdo̞ ʃinʃɯː
+借り kɑri
+白河 ʃirakaɯa
+続伸 zo̞kɯʃin
+ちらちら tʃirɑtʃirɑ
+整える to̞to̞no̞erɯ
+払しょく fʊʃokɯ
+出よ dejo̞
+釣り t͡sɯri
+水浴び mizɯɑbi
+飛躍 hijɑkɯ
+大意 tɑː
+と共に to̞ to̞mo̞ ni
+崩さ kɯzɯsa
+戦力 senrjokɯ
+維新 iʃin
+直視 tʃo̞kɯʃi
+怖く koɯakɯ
+つえ t͡sɯe̞
+覗き no̞zo̞ki
+駿 ʃɯn
+必至 hɪʔʃi
+滞在 tɑizɑi
+受け取り ɯketo̞ri
+前年度 zen nendo̞
+悲願 higɑn
+寝 ne̞
+テ te̞
+キエボ ki ebo̞
+催眠 sɑimin
+競り勝ち seɾikɑtʃi
+ヘロヘロ hero̞hero̞
+関税 kɑnzeː
+マイナス mainasɯ
+たすき tasɯki
+連座 renzɑ
+屋 jɑ
+オーケストラ oːkesɯtoɾa
+躍り出 o̞do̞ride
+家系 kɑkeː
+同局 do̞ɯkʲo̞kɯ
+警戒 keːkɑi
+因数 insɯː
+根 ne̞
+尊敬 so̞nkeː
+中身 nɑkɑmi
+要る irɯ
+不穏 ɸɯo̞n
+不行届き ɸɯjɯkito̞do̞ki
+湧き水 ɯakimizɯ
+レクサス ɾekɯsasɯ
+円谷 t͡sɯbɯrɑjɑ
+転身 te̞nʃin
+ロールスロイス roːrɯsɯroisɯ
+公言 ko̞ɯgen
+毎回 mɑikɑi
+健 ke̞n
+格好よかっ kɑʔkoːjokɑʔ
+散布 sanpɯ
+古文 ko̞bɯn
+旅路 tɑbidʒi
+崩れ落ちる kɯzɯreo̞tʃirɯ
+盛況 seːkjo̞ː
+どこ do̞ko̞
+賃貸 tʃintɑi
+思い通り o̞mo̞i to̞ːri
+歌 ɯtɑ
+カフェテリア kɑɸeteriɑ
+たたい tɑtɑi
+継が t͡sɯgi gɑ
+許さ jɯɾɯsa
+典型 te̞nke̞ː
+外れる hɑzɯrerɯ
+つける t͡sɯke̞rɯ
+汚物 o̞bɯt͡sɯ
+見くびる mikɯbirɯ
+たたえる tɑtɑerɯ
+気合い kiɑi
+とてつもない totet͡sɯ mo nɑi
+職種 ʃo̞kɯʃɯ
+媒介 bɑikɑi
+マキャベリズム mɑkʲɑberizɯmɯ
+週刊 ʃɯːkɑn
+薬屋 kɯsɯɾija
+付き物 t͡sɯkimo̞no̞
+伺お ɯkɑgɑo
+牛 ɯʃi
+詳細 ʃoːsai
+堅実 ke̞ndʒit͡sɯ
+夜間 jɑkɑn
+アウト ɑɯto
+詩 ʃi
+渡米 to̞beː
+つかむ t͡sɯkɑmɯ
+炎 ho̞no̞ː
+週間 ʃɯːkɑn
+履歴 rire̞ki
+来い ko̞i
+より jo̞ri
+黄色い kiiro̞i
+ボコ bo̞ko̞
+塞ぎ fɯsagi
+子宮 ʃikʲɯː
+演目 enmo̞kɯ
+スレッド sɯɾeʔdo
+つか t͡sɯ kɑ
+総裁 soːsɑj
+塗り nɯri
+増産 zoːsan
+版権 hɑnken
+松 mɑt͡sɯ
+あら ɑrɑ
+気づい kizɯi
+再度 sɑido
+浴びる ɑbirɯ
+ひざ hizɑ
+落ち着か otʃit͡sɯkɑ
+代筆 dɑihit͡sɯ
+分野 bɯɲa
+純粋 dʒɯnsɯi
+たっ tɑʔ
+済まない sɯmanai
+前代未聞 zendɑi mimon
+奥田 okɯdɑ
+全景 ze̞nke̞ː
+適量 tekirʲo̞ɯ
+満腹 mɑnpɯkɯ
+あう ɑɯ
+ヒマ himɑ
+きゅっと kʲɯʔ to̞
+本舗 ho̞npo̞
+城 ʃiro̞
+裾 sɯso
+突出 toʔʃɯtsɯɯ
+変わろ kaɯaro
+それでも so̞ɾedemo̞
+数量 sɯːrjoː
+病棟 bʲo̞ɯto̞ɯ
+生 nɑmɑ
+不案内 ɸɯɑnnɑi
+屋根裏 jɑneɯrɑ
+告知 ko̞kɯtʃi
+サジ sɑʒi
+蕁麻疹 dʒinmɑʃin
+下表 kɑçoː
+電磁 de̞ndʒi
+暗く kɯrɑkɯ
+取り込ん to̞riko̞n
+はかっ hɑkɑʔ
+主人 ʃɯdʒin
+移り ɯt͡sɯri
+演奏 enso̞ː
+告発 kokɯhɑt͡sɯ
+退治 tɑidʒi
+発生 hɑʔseː
+マイカー mɑi kɑː
+覆せる kɯtsɯgaeseɾɯ
+試飲 ʃiːn
+課題 kɑdɑi
+不景気 ɸɯke̞ːki
+メーカー meːkɑː
+飾ら kɑzɑrɑ
+仰天 gʲo̞ɯten
+人志 hito̞ʃi
+東京 to̞kʲo̞
+語り継ご kɑtɑrit͡sɯgo
+断り kotoɯari
+ゼブラ zebɯrɑ
+最強 sɑikjoː
+お嬢様 oʒoːsɑmɑ
+ドコモ do̞ko̞mo̞
+決定 ke̞ʔte̞ː
+紅 kɯrenɑi
+中歌 nɑkɑ ɯtɑ
+角度 kɑkɯdo
+密 mit͡sɯ
+茶こし tʃɑkoʃi
+集中 ʃɯːtʃɯː
+見応え migotɑe
+自転車 dʒitenʃɑ
+否 inɑ
+ょっぱいですが joʔpaidesɯɣa
+こぼれる ko̞bo̞rerɯ
+弘人 hiro̞to̞
+村 mɯrɑ
+甘党 ɑmɑtoː
+気質 kiʃit͡sɯ
+反撃 hɑngeki
+おいしく o̞iʃikɯ
+空力 kɯːrʲo̞kɯ
+イコール iko̞ːrɯ
+振る ɸɯrɯ
+秀和 ʃɯːɯa
+頑張る gɑnbɑrɯ
+手法 ʃɯho̞ɯ
+協調 kʲo̞ɯtʃo̞ɯ
+違法 iho̞ɯ
+ことわざ kotoɯaza
+パキッ pɑki
+見込め miko̞me
+不況 ɸɯkʲo̞ɯ
+数学 sɯːɡakɯ
+ぬぐえ nɯgɯe̞
+っぽく ʔpo̞kɯ
+白米 hɑkɯmɑi
+踏ん ɸɯn
+古城 ko̞dʒo̞ɯ
+はるか hɑrɯkɑ
+時計 to̞keː
+高島 tɑkɑʃimɑ
+さしかかっ sɑʃikɑkɑʔ
+からっと kɑrɑʔ to
+人民元 dʒinmin mo̞to̞
+工業 ko̞ɯgʲo̞ɯ
+レシピ re̞ʃipi
+出演 ʃɯt͡sɯe̞n
+シェア ʃeɑ
+うまく ɯmɑkɯ
+モス mosɯ
+功 ko̞ɯ
+選べる erɑberɯ
+ソツ sotsɯɯ
+ウォーキング o̞ːkingɯ
+三栄 sɑ̃e̞ː
+ユダヤ jɯdɑjɑ
+志向 ʃiko̞ɯ
+天 te̞n
+平衡 heːko̞ː
+飽き ɑki
+二元論 ni gen ro̞n
+蚕 kɑiko
+売り ɯri
+行く ikɯ
+チャリティー tʃɑritiː
+マシ mɑʃi
+くださっ kɯdasaʔ
+ならば nɑrɑbɑ
+表敬 ço̞ːkeː
+やってくる jɑʔte kɯrɯ
+盛り込ん mo̞riko̞n
+ハプニング hɑpɯningɯ
+口径 ko̞ːkeː
+号泣 go̞ɯkʲɯː
+モワモワ moɯamoɯa
+数え kɑzoe
+杉原 sɯgihaɾa
+承知 ʃo̞ɯtʃi
+辞めよ jɑmejo
+洋館 joːkɑn
+磐田 iɯata
+揶揄 jɑjɯ
+文脈 bɯnmʲɑkɯ
+ミス misɯ
+死ね ʃine̞
+廃棄 hɑiki
+つかまえ t͡sɯkɑmɑe
+沈下 tʃinkɑ
+間借り mɑgɑri
+理系 rike̞ː
+倉庫 so̞ːko̞
+都会 tokɑi
+乗り込み no̞riko̞mi
+紐 himo̞
+海洋 kɑijoː
+体感 tɑikɑn
+ウラン ɯrɑn
+述べる no̞berɯ
+飼っ kɑʔ
+深刻 ʃinko̞kɯ
+梁 rʲo̞ɯ
+タイマー tɑimɑː
+防止 bo̞ɯʃi
+限る kɑgirɯ
+聴い kii
+緊密 kinmit͡sɯ
+テロ tero̞
+井川 igaɯa
+学友 gɑkɯjɯː
+面積 me̞nse̞ki
+中期 tʃɯːki
+やる jɑrɯ
+とりつか torit͡sɯ kɑ
+滑ら sɯbeɾa
+逃す noɰᵝasɯ
+呼び jo̞bi
+後ほど no̞tʃiho̞do̞
+驚かさ odorokɑsɑ
+広幸 hiro̞jɯki
+だいぶ dɑibɯ
+大野 o̞ːno̞
+とりあえず toriɑezɯ
+冷温 reːo̞n
+吐き気 hɑkike
+気遣い kizɯkɑi
+渡そ vɑtɑso
+貼っ hɑʔ
+済 sɯmi
+アフマディネジャド ɑɸɯmɑdinedʒɑdo
+屋根 jɑne
+暗い kɯrɑi
+感度 kɑndo
+無力 mɯrʲo̞kɯ
+踊り o̞do̞ri
+さて sɑte
+政令 se̞ːɾe̞ː
+東北電力 to̞ɯho̞kɯ denrʲo̞kɯ
+楽しめる tɑnoʃimerɯ
+利恵 rie̞
+おじさん oxisɑn
+ニコチン niko̞tʃin
+秋子 ɑkiko
+まっしぐら maʔʃiɡɯɾa
+三下 sɑnʃitɑ
+氏名 ʃime̞ː
+表参道 omotesɑ̃do
+着眼 tʃɑkɯgɑn
+電線 de̞nse̞n
+長州 tʃo̞ɯʃɯː
+探す sagasɯ
+発芽 hɑt͡sɯgɑ
+通る to̞ːrɯ
+藤野 ɸɯdʒino̞
+正解 seːkɑi
+疑わ ɯtagaɯa
+独特 do̞kɯto̞kɯ
+並行 heːko̞ː
+老化 roːkɑ
+機能 kino̞ɯ
+井上 ino̞ɯe
+抱い idɑi
+政務 seːmɯ
+水分 sɯibɯn
+期待 kitɑi
+バラエティー bɑrɑetiː
+ホリウチ ho̞riɯtʃi
+図々しく zɯːzɯːʃikɯ
+もさ mosɑ
+公募 ko̞ɯbo̞
+史実 ʃidʒit͡sɯ
+書こ kɑko
+奏で kɑnɑde
+押さえ込む osaekomɯ
+新聞 ʃinbɯn
+ボイラー boirɑː
+五十六 go̞dʒɯː ro̞kɯ
+見送っ mio̞kɯʔ
+毎週 mɑiʃɯː
+失望 ʃit͡sɯbo̞ɯ
+昇級 ʃo̞ɯkʲɯː
+主賓 ʃɯhin
+貴 tɑkɑ
+って ʔte̞
+病巣 bjo̞ːso̞ː
+邦画 hoːgɑ
+実現 dʒit͡sɯge̞n
+兼ね ke̞n ne̞
+身分 mibɯn
+割い sɑi
+騒ぎ出し saɯaɡidaʃi
+発信 hɑsɕin
+セコム sekomɯ
+準 hito̞ʃi
+満身 mɑnʃin
+うつむい ɯt͡sɯmɯi
+はじめ hɑdʒime
+米国 beːko̞kɯ
+幼なじみ osɑnɑnɑjimi
+粥 kɑjɯ
+達也 tɑt͡sɯjɑ
+訃報 ɸɯho̞ɯ
+片山 kɑtɑjɑmɑ
+振り向けよ ɸɯrimɯkejo̞
+清水 ʃimizɯ
+視野 ʃijɑ
+坂口 sakagɯtʃi
+保坂 hosɑkɑ
+交代 koːtɑi
+スイーツ sɯiːtsɯ
+猿橋 saɾɯhaʃi
+気付かさ kizɯkasa
+容姿 jo̞ɯʃi
+間接 kɑnsetsɯ
+弱 dʒɑkɯ
+陥没 kɑnbot͡sɯ
+尿 ɲo̞ɯ
+総額 soːɡakɯ
+金髪 kinpɑt͡sɯ
+ドン do̞n
+書い kɑi
+ジャズ dʒɑzɯ
+不気味 ɸɯkimi
+橋下 hɑʃimoto
+風景 ɸɯːke̞ː
+増やそ fɯjaso
+深夜 ʃiɲɑ
+家事 kɑdʒi
+選抜 senbatsɯ
+改修 kɑiʃɯː
+沸い ɯai
+まざまざ mɑzɑmɑzɑ
+手帳 tetʃo̞ɯ
+深まる ɸɯkɑmɑrɯ
+折れ o̞re
+席 se̞ki
+惰性 dɑseː
+荒い ɑrɑi
+イカルス ikaɾɯsɯ
+曲げ mɑge
+膨大 boːdɑi
+潮流 tʃo̞ɯrʲɯː
+公務 ko̞ɯmɯ
+土木 do̞bo̞kɯ
+威圧 iɑt͡sɯ
+戦国 seᵝagokɯ
+オーク o̞ːkɯ
+女房 ɲo̞ɯbo̞ɯ
+割引 ɯaribiki
+まず mɑzɯ
+チャンソン tʃɑnˈsɔn
+価格 kɑkɑkɯ
+隙 sɯki
+卒業生 sotsɯgjoːseː
+デパート depɑːto
+私鉄 ʃite̞t͡sɯ
+神谷 kɑmijɑ
+パンケーキ pɑnkeːki
+拭い nɯgɯi
+人員 dʒinin
+デニム de̞nimɯ
+甘え ɑmɑe
+団体 dɑntɑi
+考え直す kaᵑgaenaosɯ
+女傑 dʒo̞ket͡sɯ
+演出 e̞nʃɯt͡sɯ
+断っ kotoɯaʔ
+技術 gidʒɯt͡sɯ
+宥和 jɯːɯa
+虚構 kʲo̞ko̞ɯ
+貼ら hɑrɑ
+横綱 jokozɯnɑ
+ガン gɑn
+多難 tɑnɑn
+困ら komɑrɑ
+花月園 kɑget͡sɯen
+覚悟 kɑkɯgo
+ホテル ho̞terɯ
+店内 tennɑi
+彰 ɑkirɑ
+スピーチ sɯpiːtʃi
+見ろ miro̞
+鼠 ne̞zɯmi
+總 sɑtoʃi
+東芝 toːʃibɑ
+三菱 mit͡sɯbiʃi
+炒める itɑmerɯ
+裏 ɯrɑ
+それなり soɾenɑɾi
+菊川 kikɯkaɯa
+とらえ torɑe
+薪 tɑkigi
+闘争 to̞ːso̞ː
+質 ʃit͡sɯ
+気力 kirʲo̞kɯ
+叶え kɑnɑe
+追い風 oikɑze
+流行 rʲɯːko̞ɯ
+折り込ん o̞riko̞n
+戸籍 ko̞seki
+追及 t͡sɯikʲɯː
+大倉 oːkɯrɑ
+面白い o̞mo̞ʃiro̞i
+介入 kɑiɲɯː
+取り調べ toriʃirɑbe
+低けれ hikɯke̞re̞
+吹き飛ん ɸɯkito̞n
+すばやく sɯbajakɯ
+家主 jɑnɯʃi
+取り押さえ toɾjosɑe
+留まる tomɑrɯ
+フィードバック ɸiːdobɑʔkɯ
+マヤ mɑjɑ
+指し sɑʃi
+けら kerɑ
+真 ʃin
+持ちかけ motʃikɑke
+驚い o̞do̞ro̞i
+トヨタ自動車 tojotɑ dʒidoːʃɑ
+液晶 ekiʃo̞ɯ
+無地 mɯdʒi
+肝炎 kɑnɯn
+順応 dʒɯnno̞ɯ
+順番 dʒɯnbɑn
+ピンク pinkɯ
+対話 taiɯa
+会長 kɑitʃoː
+拍手 hɑkɯʃɯ
+悲鳴 hime̞ː
+ひ hi
+ナポレオン nɑporeon
+こびりつく ko̞birit͡sɯkɯ
+初値 hɑt͡sɯne
+辿る tɑdorɯ
+ウコン ɯko̞n
+シュール ʃɯːrɯ
+高圧 koːɑt͡sɯ
+小学生 ʃoːɡakɯseː
+リレー rire̞ː
+方策 hoːsakɯ
+ばかり bɑkɑri
+クボミ kɯbo̞mi
+アクセル akɯseɾɯ
+散 sɑn
+憩い iko̞i
+択 tɑkɯ
+桁外れ ketɑhɑzɯre
+盲従 mo̞ɯdʒɯː
+呼び起こす jobiokosɯ
+面識 me̞nʃiki
+査定 sɑteː
+蕎麦 sobɑ
+楽屋落ち gɑkɯjɑ otʃi
+神様 kɑmisɑmɑ
+果て hɑte
+懐中 kɑitʃɯː
+光熱 ko̞ɯnet͡sɯ
+下落 gerɑkɯ
+制する seːsɯɾɯ
+米作 beːsakɯ
+埋まっ ɯmɑʔ
+サーバー sɯɑɯbɑɯ
+定年 te̞ːne̞n
+冷凍庫 reːto̞ːko̞
+移る ɯt͡sɯrɯ
+輝幸 te̞rɯjɯki
+旬 ʃɯn
+持ち上げ motʃiɑge
+聞く kikɯ
+変幻 he̞nge̞n
+帯 o̞bi
+機運 kiɯn
+貧し hinʃi
+不慮 ɸɯrʲo̞
+本書 ho̞nʃo̞
+シリア ʃiriɑ
+営業 eigʲo̞ː
+漏れ mo̞re
+地価 tʃikɑ
+卓也 tɑkɯjɑ
+重んじる o̞mo̞ndʒirɯ
+名門 meːmo̞n
+要り iri
+ストック sɯtoʔkɯ
+利徳 to̞ʃino̞ri
+一読 itʃido̞kɯ
+押忍 osɯ
+暴風 bo̞ɯɸɯː
+魅せ mise̞
+名古屋 nɑgojɑ
+地震 dʒiʃin
+成就 dʒo̞ɯdʒɯ
+目前 mo̞kɯzen
+山積 sɑnseki
+工事 ko̞ɯdʒi
+侵す okasɯ
+山間 sɑnkɑn
+応急 o̞ɯkʲɯː
+おじいちゃん odʒiːtʃjɑn
+焦っ ɑseʔ
+落盤 rɑkɯbɑn
+解像度 kɑizoː do
+トイ to̞i
+登録 to̞ɯro̞kɯ
+不器用 ɸɯkijo̞ɯ
+同国 do̞ɯko̞kɯ
+同様 do̞ɯjo̞ɯ
+産業 saːgjoː
+実ら minorɑ
+集約 ʃɯːjɑkɯ
+臨床 rinʃo̞ɯ
+申請 ʃinse̞ː
+こいつ ko̞it͡sɯ
+覆う o̞ːɯ
+ひたすら hitasɯɾa
+錦織 niʃikio̞ri
+がれき gɑreki
+コロコロ ko̞ro̞ko̞ro̞
+歳月 saigetsɯɯ
+嬉し ɯre̞ʃi
+ハイ hɑi
+互いに tɑgɑi ni
+過小 kɑʃoː
+蒲田 kɑmɑtɑ
+見直す minaosɯ
+囲っ kɑkoʔ
+迫力 hɑkɯrʲokɯ
+ぶん bɯn
+セキュリティ sekjɯriti
+到底 to̞ːteː
+実例 dʒit͡sɯre̞ː
+遅く osokɯ
+設ける mo̞ɯkerɯ
+郵政 jɯːseː
+FIL fil
+降っ ɸɯʔ
+見分け miɯake
+来月 rɑiget͡sɯ
+爪 t͡sɯme̞
+ゴリラ gorirɑ
+往人執 ?? ?
+夢 jɯme̞
+ダメ押し dɑme oʃi
+粗悪 soːkɯ
+ニコン niko̞n
+優しい jɑsɑʃiː
+紛争 fɯnsoː
+ムーンサルト mɯːnsaɾɯto
+うるさい ɯɾɯsai
+速 sokɯ
+次ぐ t͡sɯgɯ
+部隊 bɯtɑi
+茶 tʃɑ
+甘んじ ɑmɑndʒi
+ペンネーム pe̞n ne̞ːmɯ
+対比 tɑihi
+解け to̞ke
+降板 koːbɑn
+いいなさ iinɑsɑ
+そう so̞ː
+シュン ʃɯn
+麻酔 masɯi
+的中 te̞kitʃɯː
+かたは kata ɯa
+ジュンス dʑɯnːzɯ
+明け ɑke
+家具 kɑgɯ
+済まし sɯmaʃi
+ようは joː ɯa
+よみがえる jomigɑerɯ
+散らす tʃiɾasɯ
+割合 ɯariai
+村田 mɯrɑtɑ
+愛国 ɑikokɯ
+ひとまず hitomɑzɯ
+かかる kɑkɑrɯ
+さっき sɑʔki
+よも jo̞mo̞
+下降 kɑkoː
+ミラ mirɑ
+急 kʲɯː
+片 kɑtɑ
+引き出せる hikidaseɾɯ
+遺族 izo̞kɯ
+事件 dʒike̞n
+メガネ megɑne
+繁華 hɑnkɑ
+オバマ obɑmɑ
+つつい t͡sɯt͡sɯ i
+軽油 ke̞ːjɯ
+味方 mikɑtɑ
+東南 toːnɑn
+ゴム go̞mɯ
+ソジェン so̞ʒɛn
+寄与 kijo̞
+誓い tʃikɑi
+性別 se̞ːbe̞tsɯ
+社債 ʃɑzɑi
+検定 ke̞nte̞ː
+難 nɑn
+サラ sɑɾɑ
+同署 do̞ɯʃo̞
+凪 nɑgi
+平々凡々 heːheː bo̞nbo̞n
+放送大学 hoːsoːdaigakɯ
+ドタバタ dotɑbɑtɑ
+ひとつ hito̞t͡sɯ
+事項 dʒiko̞ɯ
+ゅ jɯ
+住む sɯmɯ
+壽 ko̞to̞bɯki
+肉食 nikɯʃo̞kɯ
+家内 kɑnɑi
+着い t͡sɯi
+思わず omoɯazɯ
+折り合い oriɑi
+拘束 koːsokɯ
+フィーバー ɸiːbɑː
+フセ fɯze
+思惑 omoɯakɯ
+コロプラ koro pɯrɑ
+スロー sɯɾoː
+ニュージーランド ɲɯːdʒiːrɑndo
+薄い ɯsɯi
+舎弟 ʃɑteː
+整然 se̞ːze̞n
+倍率 bɑirit͡sɯ
+不人気 ɸɯninki
+斎藤 sɑitoː
+照会 ʃoːkɑi
+選び erɑbi
+存在 sonzɑi
+きらい kirɑi
+主婦 ʃɯɸɯ
+動き ɯgo̞ki
+次世代 dʒisedai
+美雪 mijɯki
+フィギュア ɸigʲɯɑ
+国内外 kokɯnɑigɑi
+地味 dʒimi
+もうすぐ moːsɯɡɯ
+早くから hɑjɑkɯ kɑrɑ
+防犯 boːhɑn
+再起 sɑiki
+長電話 tʃoːdenɯa
+常務 dʒo̞ɯmɯ
+分厚い bɯɑtsɯi
+援交 enko̞ɯ
+震え ɸɯrɯe̞
+救助 kʲɯːdʒo̞
+賛成 sɑnseː
+本社 honʃɑ
+認知 nintʃi
+純白 dʒɯnpɑkɯ
+泡 aɯa
+インカレ inkɑre
+帰れる kɑererɯ
+下方 kɑhoː
+ちゃんと tʃɑnto
+手段 ʃɯdɑn
+農産物 noːsanbɯtɯ
+掲げ kɑkɑge
+連歌 rengɑ
+左肩 hidɑrikɑtɑ
+ストップ sɯtoʔpɯ
+山田 jɑmɑdɑ
+ラグビー rɑgɯbiː
+並々 nɑminɑmi
+漫画 mɑngɑ
+聞き出す kikidasɯ
+総務 soːmɯ
+謹んで t͡sɯt͡sɯʃinde̞
+もどかし modokɑʃi
+該当 gɑitoː
+アスベスト asɯbesɯto
+段階 dɑnkɑi
+巻く mɑkɯ
+遺影 ie̞ː
+とっくに to̞ʔkɯ ni
+方向 ho̞ɯko̞ɯ
+平山 hirɑjɑmɑ
+かじっ kɑdʒiʔ
+なんで nɑn de
+二の次 nino̞t͡sɯgi
+次期 dʒiki
+目撃 mo̞kɯgeki
+信じ ʃindʒi
+作れ t͡sɯkɯre̞
+マケインオバマ mɑkein obɑmɑ
+たどっ tɑdoʔ
+上 ɯe̞
+果たす hatasɯ
+公文 ko̞ɯbɯn
+閑古鳥 kɑnkotʃoː
+給水 kjɯːsɯi
+期限 kige̞n
+はめ hɑme
+松島 mɑt͡sɯʃimɑ
+担っ ninɑʔ
+部 bɯ
+適度 tekido̞
+必須 hiʔsɯ
+出入り de̞iri
+前面 ze̞nme̞n
+たけ tɑke
+戦友 seɲjɯː
+亀治郎 kɑmedʒiroː
+岐路 kiro̞
+データ通信 deːtɑ t͡sɯːʃin
+喚起 kɑnki
+レーダー reːdɑː
+不当 ɸɯto̞ɯ
+スマホ sɯmaho
+ため tɑme
+匡子 mɑsɑko
+テコ入れ teko̞ire
+依然 ize̞n
+武司 tɑkeʃi
+三宮 sɑnnomiɑ
+まだ mɑdɑ
+足り tɑri
+髪の毛 kɑminoke
+效 ?
+関数 kansɯː
+呼び止め jo̞bito̞me
+早起き hɑjɑoki
+酵素 ko̞ːso̞
+ビジャ bidʒɑ
+消毒 ʃo̞ɯdo̞kɯ
+心から kokoro kɑrɑ
+爽快 soːkɑj
+龍 rʲɯː
+こころ ko̞ko̞ro̞
+展望 tenbo̞ɯ
+官庁 kɑntʃoː
+最前線 sɑizensen
+西村 niʃimɯrɑ
+脳卒中 noːsoʔtʃɯː
+揉める mo̞merɯ
+優先 jɯːsen
+庁 tʃo̞ɯ
+退屈 tɑikɯt͡sɯ
+死別 ʃibe̞t͡sɯ
+実社会 dʒit͡sɯʃɑkɑi
+ラン rɑn
+左手 hidɑrite
+新設 ʃinse̞tsɯ
+惚れ込ん ho̞reko̞n
+太る ɸɯto̞rɯ
+課金 kɑkin
+あげる ɑgerɯ
+ギャルソン gaɾuson
+不履行 ɸɯriko̞ɯ
+急上昇 kʲɯː dʒo̞ɯʃo̞ɯ
+数少ない kazɯsɯkɯnai
+ビデオカメラ bideo kɑmerɑ
+慶大 keːdɑi
+打者 dɑʃɑ
+診 ʃin
+崇拝 sɯːhai
+シジミ ʃidʒimi
+座ら sɯwaɾa
+ドムジャレ domɯ dʒɑre
+プライド pɯrɑido
+引き払う hikihɑrɑɯ
+裏地 ɯrɑdʒi
+楽天 rɑkɯten
+援助 endʒo̞
+恒常 ko̞ɯdʒo̞ɯ
+ものすご monosɯɣo
+難航 nɑnkoː
+めちゃくちゃ metʃɑkɯtʃɑ
+天災 tensɑi
+中堅 tʃɯːke̞n
+店長 tentʃo̞ɯ
+蓄積 tʃikɯseki
+救える sɯkwerɯ
+シチュー ʃitʃɯː
+ジャパン dʒɑpɑn
+更に sɑɾɑni
+取り消し to̞rikeʃi
+ぶち bɯtʃi
+相当 so̞ːto̞ː
+異端 itɑn
+爽やか saɯajaka
+高まっ tɑkɑmɑʔ
+程度 teːdo̞
+出す dasɯ
+働く hɑtɑrɑkɯ
+残飯 zɑnpɑn
+カル kɑrɯ
+食い違い kɯitʃigɑi
+ゴールドパープルピンク goːrɯdo pɑːpɯrɯ pinkɯ
+科 kɑ
+べり be̞ri
+凄く sɯɡokɯ
+テクノ tekɯno̞
+果たし hɑtɑʃi
+物騒 bɯʔsoː
+セント sento̞
+膨らむ ɸɯkɯrɑmɯ
+なさっ nɑsɑʔ
+篠原 ʃinohɑrɑ
+批判 hihɑn
+例題 reːdɑi
+クライマックス kɯraimaʔkɯsɯ
+SPK e̞spiːekeː
+モニター monitɑː
+局内 kʲokɯnɑi
+応援 o̞ɯen
+外回り sotomaɯaɾi
+持ち運び motʃihɑkobi
+博物館 hɑkɯbɯt͡sɯkɑn
+趣旨 ʃɯʃi
+売り込む ɯriko̞mɯ
+見積もり mit͡sɯmo̞ri
+走れる hɑʃirerɯ
+つい t͡sɯi
+戦前 se̞nzɯn
+一心不乱 isʃinfɯɾan
+冬場 ɸɯjɯbɑ
+躍進 jɑkɯʃin
+勤労 kinro̞ɯ
+ルーキー rɯːkii
+全品 ze̞npin
+リード riːdo̞
+憎ま nikɯmɑ
+だって dɑ ʔte
+団塊 dɑnkɑi
+置ける o̞kerɯ
+遊べ ɑsobe
+お母様 ohɑhɑsɑmɑ
+係長 kɑkɑritʃoː
+ソー so̞ː
+うるさかっ ɯɾɯsakaʔ
+わから ɯakara
+ウェイトレス eitoɾesɯ
+チキン tʃikin
+テープ te̞ːpɯ
+薬学部 jɑkɯgɑkɯ bɯ
+ナダル nɑdɑrɯ
+描写 bʲoːʃɑ
+箕浦 minoɯrɑ
+プライバシー pɯrɑibɑʃiː
+台数 daisɯː
+袖 so̞de
+売り場 ɯribɑ
+ライン rɑin
+司法 ʃiho̞ɯ
+値し ɑtɑi ʃi
+彼ら kɑrerɑ
+天安門 tenɑmɔn
+紛失 ɸɯnʃit͡sɯ
+相 so̞ː
+防げる fɯsegeɾɯ
+漢字 kɑndʒi
+女将 okɑmi
+白い ʃiro̞i
+実に dʒit͡sɯ ni
+紋 mo̞n
+種 ʃɯ
+火災 kɑsɑi
+水戸 mito̞
+愛知 ɑitʃi
+若林 ɯakabajaʃi
+晝 hirɯ
+試運転 ʃiɯnte̞n
+左右 sajɯː
+販売 hɑnbɑi
+同僚 do̞ɯrʲo̞ɯ
+閉塞 heːsokɯ
+ツバメ t͡sɯbɑme
+一連 itʃire̞n
+もっとも mo̞ʔto̞mo̞
+マツダ mɑt͡sɯdɑ
+努める t͡sɯto̞merɯ
+軍服 gɯnpɯkɯ
+矢野 jɑno
+流血 rʲɯːke̞t͡sɯ
+配当 hɑitoː
+名前 nɑmɑe
+記号 kigo̞ɯ
+ストーカー sɯtoːkaː
+穴 ɑnɑ
+はやし hɑjɑʃi
+ぽん po̞n
+予約 jojɑkɯ
+名誉 meijo̞
+たたま tɑtɑmɑ
+争っ ɑɾɑsoʔ
+厄年 jɑkɯdoʃi
+主たる nɯʃi tɑrɯ
+巻き起こし mɑkiokoʃi
+カバー kɑbɑː
+はまる hɑmɑrɯ
+ひと言 hito̞ko̞to̞
+桃子 mo̞mo̞ko̞
+勝ち取る kɑtʃitorɯ
+忍ば ʃinobɑ
+引き金 hikigɑne
+変形 he̞nke̞ː
+役立つ jɑkɯdɑt͡sɯ
+閑静 kɑnseː
+会え ɑe
+留守 ɾɯsɯ
+カッコイイ kɑʔko ii
+子守 ko̞mo̞ri
+率直 sotʃokɯ
+地帯 tʃitɑi
+愛牛 ɑigʲɯː
+外野 gɑijɑ
+不可避 ɸɯkɑhi
+漢方薬 kɑnpoːjɑkɯ
+ホント ho̞nto̞
+北欧 ho̞kɯo̞ɯ
+歓喜 kɑnki
+贖罪 ʃokɯzɑi
+今度 ko̞ndo̞
+保育 ho̞ikɯ
+エンジン e̞ndʒin
+子育て kosodɑte
+牝馬 hinbɑ
+渡航 to̞ko̞ɯ
+皇室 ko̞ɯʃit͡sɯ
+話題 ɯadai
+ケーブル ke̞ːbɯrɯ
+教官 kʲoːkɑn
+今夜 koɲɑ
+しよ ʃijo̞
+サングラス saŋgɯɾasɯ
+傑 sɯɡɯɾɯ
+個性 ko̞seː
+枕詞 mɑkɯrɑkotobɑ
+なかなか nɑkɑnɑkɑ
+表現 ço̞ɯgen
+聖子 seːko̞
+残念 zɑnnen
+附属 ɸɯzo̞kɯ
+あくまでも ɑkɯmɑde mo
+通訳 t͡sɯːjɑkɯ
+結子 jɯːko̞
+左中間 satʃɯːkan
+敬う ɯjɑmɑɯ
+活用 kɑt͡sɯjoː
+物量 bɯt͡sɯrʲo̞ɯ
+静観 seːkɑn
+まくる mɑkɯrɯ
+嵩ま kɑsɑmɑ
+造ら t͡sɯkɯrɑ
+暴れる ɑbɑrerɯ
+入居 ɲɯːkʲo̞
+夕刊 jɯːkɑn
+祈る ino̞rɯ
+庭園 te̞ːe̞n
+妨げ sɑmɑtɑdʒe
+送金 soːkin
+つまみ t͡sɯmɑmi
+末松 sɯematsɯ
+立つ tɑt͡sɯ
+焼け jɑke
+赤木 ɑkɑgi
+可愛い kaɯaiː
+語彙 go̞i
+尖閣 senkakɯ
+減れ he̞re̞
+振っ ɸɯʔ
+描い egɑi
+厳重 ge̞ndʒɯː
+同氏 do̞ɯʃi
+標高 ço̞ɯko̞ɯ
+折っ o̞ʔ
+すなわち sɯnavatʃi
+乗れ no̞re
+悪夢 ɑkɯmɯ
+無駄 mɯdɑ
+ボヴェ bo̞ve
+詰める t͡sɯme̞rɯ
+コントラスト kontorasɯto
+かけ kɑke
+嘱託 ʃokɯtɑkɯ
+投げかける nɑgekɑkerɯ
+ぬけ nɯke̞
+こう着 koːtʃɑkɯ
+笑い声 ɯaraigoe
+割り ɯari
+宿泊 ʃɯkɯhɑkɯ
+がたい gɑtɑi
+業種 gʲo̞ɯʃɯ
+更新 ko̞ɯʃin
+ケガ kegɑ
+課す kasɯ
+補給 ho̞kʲɯː
+来る kɯrɯ
+うれし ɯre̞ʃi
+感 kɑn
+ディープ diːpɯ
+ホームページ ho̞ːmɯ peːdʒi
+襲う osoɯ
+真面目 mɑdʒime
+ウィンタースポーツ wintaːsɯpoːtsɯ
+家出 ie̞de̞
+すっ sɯʔ
+打つ ɯt͡sɯ
+てらわ teraɯa
+孫 mɑgo
+麺 me̞n
+監査 kɑnsɑ
+やって来る jɑʔte kɯrɯ
+スマトラ sɯmatoɾa
+運ば hɑkobɑ
+バーレーン bɑːreːn
+エクスペディア ekɯsɯpedia
+いただき itɑdɑki
+踏み出し ɸɯmidɑʃi
+使う t͡sɯkɑɯ
+オペ o̞pe
+直線 tʃokɯzen
+把握 hɑːkɯ
+立証 ɾiʔʃo̞ː
+精華 seːkɑ
+さえぎる saegiɾɯ
+最近 sɑikin
+つかれ t͡sɯkɑre
+喪失 soːshitsɯ
+きらきら kirɑkirɑ
+習い nɑrɑi
+努力 do̞rʲo̞kɯ
+買い手 kɑite
+外出 gɑiʃɯt͡sɯ
+訊き kiki
+フォーム ɸo̞ːmɯ
+建物 tɑtemono
+照らす teɾasɯ
+エリソン eɾiso̞n
+ごろ go̞ro̞
+はや hɑjɑ
+触れる ɸɯre̞rɯ
+補償 ho̞ʃo̞ɯ
+足す tasɯ
+心中 ʃindʒɯː
+後ずさっ atozɯsaʔ
+雑誌 zɑsʃi
+ジリ貧 dʒirihin
+少なから sɯkɯnakaɾa
+暴露 bɑkɯro
+お話し ohɑnɑʃi
+まずは mazɯ ɯa
+務める t͡sɯto̞merɯ
+いまだに imɑdɑ ni
+千趣会 senʃɯkai
+燃える mo̞erɯ
+支障 ʃiʃo̞ɯ
+長者 tʃoːdʒɑ
+富士フイルム ɸɯdʒi ɸɯirɯmɯ
+榮 sɑkɑe
+勝利 ʃo̞ɯri
+大豆 dɑizɯ
+お家 o̞ie
+武術 bɯdʒɯt͡sɯ
+辞令 dʒire̞ː
+表面 ço̞ɯmen
+苦笑 kɯʃo̞ɯ
+リリ riri
+無益 mɯe̞ki
+学芸 gɑkɯgeː
+食わず嫌い kɯɯazɯ kirai
+ゃ jɑ
+縮む tʃizimɯ
+浮かぶ ɯkɑbɯ
+燃料 nenrʲo̞ɯ
+ウイルス øyɾɯsɯ
+民業 mingʲo̞ɯ
+メモ memo̞
+悟っ sɑtoʔ
+ハウジング hɑɯdʒingɯ
+簒奪 sɑndɑtsɯ
+青年 sɯɪnən
+原 hɑrɑ
+衣裳 iʃo̞ɯ
+焦げ ko̞ge
+自慢 dʒimɑn
+栗山 kɯrijɑmɑ
+呼称 ko̞ʃo̞ɯ
+連携 re̞nke̞ː
+さら sɑɾɑ
+細胞 sɑiboː
+県営 kẽ̞e̞ː
+賃金 tʃingin
+輝か kɑgɑjɑkɑ
+きせん kise̞n
+辺り ɑtɑri
+途上 to̞dʒo̞ɯ
+高志 tɑkɑʃi
+サプリメント sapɯɾimento
+つっぱっ t͡sɯʔpɑʔ
+当 to̞ɯ
+ご破算 gohɑsɑn
+白紙 hɑkɯʃi
+記事 kidʒi
+確かめ tɑʃikɑme
+建てる tɑterɯ
+早かっ hɑjɑkɑʔ
+外債 gɑizɑi
+変化 henkɑ
+盗め nɯsɯme
+包み t͡sɯt͡sɯmi
+前後 zengo̞
+妖艶 jo̞ɯen
+ほしく ho̞ʃikɯ
+カーボンファイバーパネル kɑːbon ɸɑibɑː pɑnerɯ
+亡くなっ nɑkɯnɑʔ
+後任 ko̞ɯnin
+大引け o̞ːbike
+研修 ke̞nʃɯː
+首 kɯbi
+そうとう so̞ːto̞ː
+山 jɑmɑ
+ファルド ɸɑrɯdo
+ぬ nɯ
+キャンセル kjanseɾɯ
+マイコン mɑikon
+文字 mo̞dʒi
+よりによって jo̞ri ni jo̞ʔte
+老練 ro̞ɯren
+チャンギー tʃɑn giː
+好調 ko̞ɯtʃo̞ɯ
+喉 no̞do̞
+プリンタ pɯrintɑ
+タイムラグ tɑimɯ rɑgɯ
+しばらく ʃibɑrɑkɯ
+乗り越える no̞riko̞erɯ
+破ら jɑbɯrɑ
+だめ dɑme
+ゴクウ go̞kɯ ɯ
+ヒルズ hils
+ライス ɾaisɯ
+ラグジュアリー rɑgɯdʒɯɑriː
+ミランダ mirɑndɑ
+九州産業大 kjɯːshɯːsangʲoːdai
+最小 sɑiʃoː
+駅名 e̞kime̞ː
+いしゃ iʃɑ
+ピンク色 pinkɯ iro̞
+在 zɑi
+我慢 gɑmɑn
+尚樹 nɑoki
+魔女 mɑdʒo
+数珠 dʒɯzɯ
+はま hɑmɑ
+直ぐ sɯɰᵝabɯ
+風俗 ɸɯːzo̞kɯ
+免れ mɑnɯgɑre
+主体 ʃɯtɑi
+据え sɯe
+鷲尾 ɯaʃio
+幸運 ko̞ɯːn
+プレ pɯre̞
+パイプライン pɑipɯrɑin
+連発 renpɑt͡sɯ
+オリックス oɾikekɯsɯ
+なのに nɑ no ni
+減ら herɑ
+喜ば jorokobɑ
+コメント ko̞mento̞
+祝勝 ʃɯkɯʃo̞ɯ
+皺 ʃiɯa
+距離 kʲo̞ri
+眠る ne̞mɯrɯ
+運命 ɯnme̞ː
+救済 kjɯːsai
+頼れる tɑjorerɯ
+暗記 ɑnki
+バッグ bɑʔgɯ
+侮っ ɑnɑdoʔ
+一概に itʃigɑi ni
+絵画 kɑigɑ
+ちょき tʃo̞ki
+真上 mɑɯe
+思い当たる omoiɑtɑrɯ
+用意 jo̞ɯi
+込み上げ komiɑge
+致し方 itɑʃikɑtɑ
+カーテン kɑːten
+近代 kindɑi
+暴言 bo̞ɯgen
+フォロワー ɸoroɯaː
+ローリングストーンズ roːriŋgɯsɯtoːnzɯ
+バウハウス baɯhaɯsɯ
+依存 izo̞n
+驚く o̞do̞ro̞kɯ
+づくり zɯkɯri
+慰め nagɯzame
+話し合っ hɑnɑʃiɑʔ
+ディフェンシブ diɸe̞nʃibɯ
+埼玉 sɑitɑmɑ
+全壊 zenkɑi
+霆 ?
+さじ加減 sɑikɑgen
+次郎 dʒiro̞ɯ
+身内 miɯtʃi
+明かし ɑkɑʃi
+動かせ ɯɰᵝakase
+図画 zɯgɑ
+ニュー ɲɯː
+すぐ sɯɰᵝabɯ
+テカテカ tekɑtekɑ
+ユイ jɯi
+三崎口 misakikɯtʃi
+性 se̞ː
+日産自動車 niʔsɑ̃ʒidoːʃɑ
+テク te̞kɯ
+翼 tsɯbasa
+控除 ko̞ɯdʒo̞
+売り上げ ɯriɑge
+共有 kʲo̞ɯjɯː
+鍼灸師 ʃinkʲɯːʃi
+法曹界 hoːsoːkɑi
+ゼネ ze̞ ne̞
+トーナメント toːnɑmento
+残酷 zɑnkokɯ
+流し nɑgɑʃi
+栄冠 eːkɑn
+金魚鉢 kingʲo bɑtʃi
+ルール rɯːrɯ
+リハビリ rihɑbiri
+聴く kikɯ
+交差 koːsɑ
+交番 koːbɑn
+産経新聞 sankeːʃinbɯn
+部下 bɯkɑ
+動力 do̞ɯrʲo̞kɯ
+休場 kʲɯːdʒo̞ɯ
+役員 jɑkɯin
+共同 kʲo̞ɯdo̞ɯ
+車道 ʃɑdoː
+ハルタ hɑrɯ tɑ
+不注意 ɸɯtʃɯːi
+ケタ ketɑ
+つくれる t͡sɯkɯre̞rɯ
+ややこしい jɑjɑkoʃiː
+成果 seːkɑ
+殉教 dʒɯnkʲo̞ɯ
+留学 rʲɯːgɑkɯ
+話術 ɯadʒɯt͡sɯ
+聞き手 kikite̞
+へ e̞
+心性 ʃinse̞ː
+姉歯 ɑnehɑ
+暑う ɑt͡sɯː
+口先 kɯtʃisaki
+追求 t͡sɯikʲɯː
+推進 sɯiʃin
+浮かば ɯkɑbɑ
+そっ so̞ʔ
+備え sonɑe
+岩城 iɯaki
+手堅い tegɑtɑi
+ゆり jɯri
+総会 soːkɑj
+厳しく kibiʃikɯ
+間貸 mɑgɑʃi
+走 so̞ː
+自国 dʒiko̞kɯ
+調べる ʃirɑberɯ
+デモクラシー demokɯrɑʃiː
+閉まら ʃimɑrɑ
+極まりない kiɯamarinai
+空手 kɑrɑte
+右左 migihidɑri
+穀物 ko̞kɯmo̞t͡sɯ
+食卓 ʃokɯtɑkɯ
+立役者 rijjɑkɯʃɑ
+法学 hoːgɑkɯ
+不動 ɸɯdo̞ɯ
+飛び抜け to̞binɯke
+みなさん minɑsɑn
+ガラス gaɾasɯ
+試み ko̞ko̞ro̞mi
+危険 kike̞n
+神父 ʃinpɯ
+デバイス debaisɯ
+直し nɑoʃi
+たゆま tɑjɯmɑ
+奔走 ho̞nso̞ː
+怪しま ɑjɑʃimɑ
+促し ɯnɑgɑʃi
+みせる miseɾɯ
+すっぱく sɯʔpakɯ
+思え o̞mo̞e
+ボクサー bokɯsaː
+裕福 jɯːɸɯkɯ
+言語 gengo̞
+一昔 hitomɯkɑʃi
+栄養士 eːjo̞ːʃi
+カギ kɑgi
+だっ dɑʔ
+作戦 sakɯsen
+ジャマ dʒɑmɑ
+菓子 kɑʃi
+やま jɑmɑ
+拡充 kɑkɯdʒɯː
+証券 ʃo̞ɯken
+日暮れ higɯre̞
+つきあわせ tsɯkiawase
+踏ん切り ɸɯngiri
+テント tento̞
+何気なく nɑnge nɑkɯ
+虎ノ門 torɑnomon
+無神経 mɯʃinke̞ː
+創作 soːsakɯ
+打ち込ん ɯtʃiko̞n
+自習 dʒiʃɯː
+大泉 o̞ːizɯmi
+小野 o̞no̞
+辛く t͡sɯrɑkɯ
+恩師 o̞nʃi
+順次 dʒɯndʒi
+規則正しい kisokɯtadaʃiː
+四角形 ʃikɑkɯgɑtɑ
+限っ kɑgiʔ
+哲雄 tet͡sɯo̞
+探っ saɰᵝabɯʔ
+海抜 kɑibɑt͡sɯ
+凱旋 gɑisen
+縛ら ʃibɑrɑ
+焦がさ koɰᵝɑsɑ
+利用 rijo̞ɯ
+二兎 nito̞
+注視 tʃɯːʃi
+岩手 iɯate
+オートバイ oːtobɑi
+光沢 koːtɑkɯ
+個々 ko̞ko̞
+諦め ɑkirɑme
+イケメン ike̞me̞n
+あちら ɑtʃirɑ
+進ん sɯsɯn
+ほそりゃ hosoɾjɑ
+平和 heːɯa
+る rɯ
+デー de̞ː
+固定 ko̞teː
+呼ん jo̞n
+踏み台 ɸɯmidɑi
+ほめ ho̞me
+白百合学園 ʃirɑjɯri gɑkɯen
+建築 ke̞ntʃikɯ
+観る mirɯ
+急速 kjɯːsokɯ
+レーサー reːsɑː
+幼い osɑnɑi
+工作 koːsakɯ
+張本人 tʃo̞ɯho̞nnin
+つるし t͡sɯrɯʃi
+ブローカー bɯroːkɑː
+締結 te̞ːke̞t͡sɯ
+トス tosɯ
+身長 ʃintʃo̞ɯ
+味わっ adʒiɯaʔ
+やる気 jɑrɯki
+軍隊 gɯntɑi
+ひかり hikɑri
+ゴヤ gojɑ
+がらり gɑrɑri
+お忙しい oisoɡɑʃiː
+および o̞jo̞bi
+しかも ʃikɑmo
+館長 kɑntʃoː
+昨今 sɑʔkon
+積もっ t͡sɯmo̞ʔ
+得点 to̞kɯten
+仙人 se̞nnɪn
+敷き ʃiki
+右 migi
+車体 ʃɑtɑi
+目上 me̞ɯe̞
+ソフト sofɯto
+仲良 nɑkɑ jo
+日 hi
+オベンチャラ obentʃɑrɑ
+礼砲 reːho̞ː
+小さな tʃiːsɑnɑ
+転校生 tenko̞ːseː
+立ちション tɑtʃiʃon
+しまい ʃimɑi
+氣 ki
+合コン go̞ɯko̞n
+下痢 ge̞ri
+則子 no̞riko̞
+晩 bɑn
+フットワーク ɸɯʔtoɯaːkɯ
+点検 te̞nke̞n
+クルーズ kɯrɯːzɯ
+探し sɑgɑʃi
+ヨガッ jogɑ
+戦っ tɑtɑkɑʔ
+妖怪 joːkɑi
+頭痛 zɯt͡sɯː
+賢者 kendʒɑ
+グリル gɯrirɯ
+差異 sɑi
+いも imo̞
+路上 ro̞dʒo̞ɯ
+言及 ge̞nkʲɯː
+魚沼 ɯonɯmɑ
+殴り合い nɑgɯriɑi
+引き続き hikit͡sɯzɯki
+予防 jo̞bo̞ɯ
+包ま t͡sɯt͡sɯmɑ
+余る ɑmɑrɯ
+役所広司 jɑkɯʃo koːdʒi
+とか to kɑ
+閲覧 et͡sɯrɑn
+いじめ idʒime̞
+かさむ kasamɯ
+頼み tɑnomi
+変われ kaɯare
+わたっ ɯataʔ
+確信 kɑkɯʃin
+オープン o̞ːpɯn
+受けとめ ɯketo̞me
+入学 ɲɯːgɑkɯ
+懲戒 tʃoːkɑi
+ラッシュ ɾɯsʃɯ
+書ける kɑkerɯ
+寝返っ negɑeʔ
+売人 bɑinin
+予選 jo̞sen
+流せる nagaserɯ
+強制 kjo̞ːseː
+はまら hɑmɑrɑ
+奢ら ogorɑ
+注目 tʃɯːmo̞kɯ
+モーター moːtɑː
+話 hɑnɑʃi
+ソディック sodikekɯ
+雲 kɯmo̞
+フーフー ɸɯːɸɯː
+取り上げる toriɑgerɯ
+寿命 dʒɯmʲo̞ɯ
+走り出 hɑʃiride
+ロイ ro̞i
+ひときわ hitokiɯa
+へん he̞n
+ギャラリー gʲɑrɑriː
+激励 ge̞kire̞ː
+吊り t͡sɯri
+豊か jɯtɑkɑ
+とことん to̞ko̞to̞n
+株式 kɑbɯʃiki
+温か ɑtɑtɑkɑ
+カラー kɑrɑː
+飛ぶ to̞bɯ
+示さ ʃimesɑ
+どうせ doːse
+山形 jɑmɑgɑtɑ
+地球儀 tʃikʲɯːgi
+レトルト reto̞rɯto̞
+検索 kensakɯ
+風力 ɸɯːrʲo̞kɯ
+タイプ tɑipɯ
+ぎこちな gikotʃinɑ
+川 kaɯa
+暢気 no̞nki
+氣持 kimo̞tʃi
+占う ɯrɑnɑɯ
+交換 koːkɑn
+占める ʃime̞rɯ
+誘っ sɑsoʔ
+スバル sɯbaɾɯ
+美女 bidʒo̞
+保谷 hojɑ
+沈ん ʃizɯn
+もろ mo̞ro̞
+しめし ʃime̞ʃi
+疎ん ɯto̞n
+薄々 ɯsɯɯsɯ
+詠ん jo̞n
+回 kɑi
+優に jɯːni
+始末 ʃimɑt͡sɯ
+希少 kiʃo̞ɯ
+手土産 temijɑge
+攻め se̞me̞
+は ɯa
+たいてい tɑiteː
+座っ sɯwaʔ
+支出 ʃiʃɯt͡sɯ
+頂けれ itɑdɑkere
+抗争 ko̞ːso̞ː
+安けれ jasɯkeɾe
+バンク bɑnkɯ
+緩め jɯrɯme̞
+中立 tʃɯːrit͡sɯ
+民営 mĩe̞ː
+おかげ okɑge
+様々 sɑmɑzɑmɑ
+専務 senmɯ
+サマラ sɑmɑɾɑ
+プカプカ pɯkɑpɯkɑ
+恐れ o̞so̞ɾe
+アルゼンチン ɑrɯzentʃin
+異名 imʲo̞ɯ
+妊婦 ninpɯ
+談 dɑn
+佐佐木 sɑsɑki
+葬儀 so̞ːɡi
+ゆえん jɯe̞n
+乙女 o̞to̞me
+黙ら dɑmɑrɑ
+和書 ɯaʃo
+だからこそ dɑkɑrɑkoso
+住所 dʒɯːʃo̞
+連帯 rentɑi
+急い iso̞j
+凡人 bo̞ndʒin
+スキーム sɯkiːmɯ
+幕 mɑkɯ
+通り抜け to̞ːrinɯke
+ウィンナー innɑː
+丸沼 mɑrɯnɯmɑ
+思うつぼ o̞mo̞ɯ t͡sɯbo̞
+うま ɯmɑ
+泊 tomɑri
+体協 tɑikʲoː
+チョイス tʃoisɯ
+駐車 tʃɯːʃɑ
+鈍っ nibɯʔ
+抱える kɑkɑerɯ
+乗車 dʒoːʃɑ
+がらん gɑrɑn
+カーナビ kɑːnɑbi
+鏡 kɑgɑmi
+ベイトソン bəito̞so̞n
+起業 kigʲo̞ɯ
+クラブハウス kɯɾabɯhaɯsɯ
+ジューシー dʒɯːʃii
+マスト masɯto
+カーク kɑːkɯ
+気がつい ki gɑ t͡sɯi
+プライス pɯɾaisɯ
+倒す taosɯ
+過多 kɑtɑ
+預かり ɑzɯkɑri
+濃度 no̞ɯdo̞
+接待 setɑi
+墓地 bo̞tʃi
+習字 ʃɯːdʒi
+出る de̞rɯ
+言い出す iidasɯ
+追いかけ oikɑke
+主夫 ʃɯɸɯ
+太 ɸɯto̞ʃi
+脇役 ɯakijakɯ
+女 onnɑ
+ミステリー misɯteriː
+一定 iʔte̞ː
+数奇 sɯːki
+お節介 oseʔkɑi
+いまさら imɑsɑɾɑ
+カタール kɑtɑːrɯ
+同列 do̞ɯret͡sɯ
+去就 kʲo̞ʃɯː
+紗 ʃɑ
+遊川氏 jɯkaɯaʃi
+ドラ dorɑ
+禁止 kinʃi
+またい mɑ tɑi
+刑期 ke̞ːki
+相変わらず aikaɯarazɯ
+凍結 to̞ɯket͡sɯ
+国有 ko̞kɯjɯː
+悲しい kɑnɑʃiː
+ホーム ho̞ːmɯ
+中途半端 tʃɯːto hɑnpɑ
+私刑 ʃike̞ː
+保証 ho̞ʃo̞ɯ
+育て sodɑte
+三盆 sɑnbon
+缶 kɑn
+雨天 ɯte̞n
+中世 tʃɯːseː
+公民 ko̞ɯmin
+過程 kɑteː
+父親 tʃitʃiojɑ
+頼る tɑjorɯ
+記入 kiɲɯː
+パートナー pɑːtonɑː
+マツ mɑt͡sɯ
+大河ドラマ tɑigɑ dorɑmɑ
+大火 tɑikɑ
+よぎる jo̞girɯ
+使途 ʃito̞
+移 ?
+デジタル dedʒitɑrɯ
+フライパン ɸɯrɑipɑn
+天然 te̞nne̞n
+地獄 dʒigo̞kɯ
+マネジメント mɑnedʒimento
+そば sobɑ
+兄妹 kʲoːdɑi
+悪口 ɯarɯgɯtʃi
+脱力 dɑt͡sɯrʲokɯ
+板尾 itɑo
+なおす naosɯ
+瀬沼 senɯma
+有馬 ɑrimɑ
+互助 go̞dʒo̞
+うろつい ɯro̞t͡sɯi
+失恋 ʃit͡sɯre̞n
+法廷 ho̞ːteː
+青空 ɑozorɑ
+水 mizɯ
+どう do̞ɯ
+シビル ʃibirɯ
+行動 ko̞ɯdo̞ɯ
+取り込み to̞riko̞mi
+試し tɑmeʃi
+バイアス bajasɯ
+衝突 ʃo̞ɯto̞t͡sɯ
+コストコ kosɯtoko
+どちら dotʃirɑ
+とれ to̞re
+警告 keːko̞kɯ
+自宅 dʒitɑkɯ
+素人 ʃiro̞ɯto̞
+愛 ɑi
+隠居 inkʲo̞
+文学 bɯngɑkɯ
+軒 no̞ki
+非公開 hikoːkɑi
+有名人 jɯːme̞ːdʒin
+まことに mɑkoto ni
+怖がら koɯagara
+僕たち bokɯtɑtʃi
+加護 kɑgo
+松原 mɑt͡sɯbɑrɑ
+資本 ʃiho̞n
+対戦 tɑisen
+需要 dʒɯjo̞ɯ
+届け出 to̞do̞kede
+防戦 bɑʊ̯sən
+うらら ɯrɑrɑ
+楽園 rɑkɯen
+キアヌ kiɑnɯ
+武蔵大学 mɯsaʃidaigakɯ
+画家 gɑkɑ
+花開く hɑnɑ hirɑkɯ
+必ず kɑnɑrɑzɯ
+ソーシャルメディア soːʃarɯmedia
+統治 to̞ɯtʃi
+帝 mikɑdo
+大津 o̞ːt͡sɯ
+はした金 hɑʃitɑgɑne
+ふう ɸɯː
+斷 ?
+フーンフーンフーン fɯːnfɯːnfɯːn
+アレクシス aɾekɯʃisɯ
+想像 so̞ːzo̞ː
+業 go̞ɯ
+集会 ʃɯːkɑi
+経営 ke̞ːe̞ː
+表情 ço̞ɯdʒo̞ɯ
+買う kɑɯ
+ニュアンス ɲɯansɯ
+福利 ɸɯkɯri
+事前 dʒize̞n
+結ば mɯsɯba
+鳴尾浜 nɑrɯo hɑmɑ
+両国 rʲo̞ɯko̞kɯ
+偏向 henko̞ɯ
+鈍感 donkɑn
+濃厚 no̞ɯko̞ɯ
+バカン bɑkɑn
+片付い kɑtɑzɯi
+バイク bɑikɯ
+力 tʃikɑrɑ
+皇居 ko̞ɯkʲo̞
+愛好 ɑikoː
+思わしくなかっ omoɯaʃikɯ nakaʔ
+恥じらい hɑdʒirɑi
+代金 dɑikin
+縦断 dʒɯːdɑn
+なくっ nɑkɯʔ
+イルミネーション irɯmineːʃo̞n
+鋭かっ sɯɾɯdokaʔ
+オシャレ oʃɑre
+育っ sodɑʔ
+発想 hɑsoː
+右往左往 ɯoːsaoː
+銀 gin
+カズ kɑzɯ
+どんでん返し dondengɑeʃi
+フェース feːsɯ
+いしだ iʃidɑ
+賄っ mɑkɑnɑʔ
+伺い ɯkɑgɑi
+曇天 do̞nten
+どんなに donnɑ ni
+分解 bɯnkɑi
+水柱 sɯitʃɯː
+はあ hɑː
+鎮圧 tʃinatsɯɯ
+半ば nɑkɑbɑ
+投球 to̞ɯkʲɯː
+抜け出 nɯke̞de̞
+大して tɑiʃite
+ピンはね pinhɑne
+経緯 ke̞ːi
+近づく tʃikɑzɯkɯ
+同伴 doːhɑn
+非常 hidʒo̞ɯ
+ざわり zaɯari
+円楽 enrɑkɯ
+横ばい jokobɑi
+とい to̞i
+手厳しい te̞kibiʃiː
+延焼 enʃo̞ɯ
+真っ赤 mɑʔkɑ
+財産 sɑisɑn
+票 ço̞ɯ
+奴隷 do̞reː
+お返し okɑeʃi
+報酬 ho̞ɯʃɯː
+ループ rɯːpɯ
+ホスピタリティ hosɯpitariti
+形態 keːtɑi
+混沌 ko̞nto̞n
+明るく ɑkɑrɯkɯ
+外し hɑzɯʃi
+にこやか nikojɑkɑ
+福田 ɸɯkɯdɑ
+殴り nɑgɯri
+ヌード nɯːdo̞
+道のり mitʃino̞ri
+曖昧 ɑimɑi
+花咲 hɑnɑsɑkɑ
+以外 igɑi
+バスケ basɯke
+払う hɑrɑɯ
+全部 ze̞nbɯ
+ガウディ gɑɯdi
+ラーメン rɑːmen
+動かし ɯgokɑʃi
+メモリ memo̞ri
+美佳 mikɑ
+学際 gakɯsai
+上場 dʒo̞ɯdʒo̞ɯ
+店 mise̞
+戻ら modorɑ
+稽古 keːko̞
+メニュー me̞ɲɯː
+相野 ɑino
+トースト toːsɯto
+仕掛 ʃikɑke
+シンナー ʃinnɑː
+上ろ no̞bo̞ro̞
+ジュリー dʒɯriː
+つくっ t͡sɯkɯʔ
+食欲 ʃo̞kɯjo̞kɯ
+追随 t͡sɯizɯi
+その他 sonotɑ
+浪人 ro̞ɯnin
+未然 mize̞n
+素晴らしい sɯbaraʃiː
+寺沢 teɾasaɯa
+白鳥 hɑkɯtʃoː
+付き添い tsɯkisoj
+北村 kitɑmɯrɑ
+とらえる torɑerɯ
+アナリスト anaɾisɯto
+ツガミ t͡sɯ gɑ mi
+税額 zeːgɑkɯ
+黒幕 kɯromɑkɯ
+巡っ me̞gɯʔ
+とっとと to̞ʔto̞to̞
+勝っ kɑʔ
+ついてる t͡sɯite̞rɯ
+始動 ʃido̞ɯ
+ゃんええとこや n eːto kojɑ
+長短 tʃoːtɑn
+舎 ʃɑ
+組ん kɯn
+儀式 giʃiki
+ロールケーキ ro̞ːrɯ keːki
+伝えれ t͡sɯtɑere
+お伝え ot͡sɯtɑe
+叩き tɑtɑki
+焼 jɑke
+和 ɯa
+訪米 ho̞ːbeː
+代 dɑi
+悪徳 ɑkɯtokɯ
+上位 dʒo̞ɯi
+年数 nensɯː
+博多 hɑkɑtɑ
+動員 do̞ɯin
+ネコ neko̞
+君たち kimitɑtʃi
+余波 johɑ
+言うまでもなく iɯ mɑde mo nɑkɯ
+マズ mɑzɯ
+年月 to̞ʃit͡sɯki
+礼 re̞ː
+早い hɑjɑi
+部分 bɯbɯn
+的確 tekikɑkɯ
+選出 senshɯtsɯ
+ガソリンエンジン gɑsoɾineɴdʒin
+税 ze̞ː
+里 sɑto
+欧米 o̞ːbeː
+改正 kɑiseː
+日弁連 nitʃibe̞nre̞n
+敢闘 kɑntoː
+クソ kɯso
+待ち合わせ matʃiaɯase
+消しゴム keʃi go̞mɯ
+インクリボン inkɯ ribo̞n
+フレンゾ ɸɯren zo̞
+流派 rʲɯːhɑ
+計算 kɑisɑn
+攻撃 ko̞ɯgeki
+ナラ nɑrɑ
+弁護 bengo̞
+被さっ kabɯsaʔ
+手がける tegɑkerɯ
+洗っ ɑrɑʔ
+横 jo̞ko̞
+メグ me̞gɯ
+鑑別 kɑnbet͡sɯ
+けが kegɑ
+初夏 ʃokɑ
+規制 kise̞ː
+応募 o̞ɯbo̞
+思い出 o̞mo̞ide
+正直 ʃo̞ɯdʒiki
+蝕ん mɯʃibɑn
+アディダス adʑidasɯ
+個別 ko̞bet͡sɯ
+被曝 hibɑkɯ
+癒着 jɯtʃɑkɯ
+松坂 mɑt͡sɯzɑkɑ
+懲り ko̞ri
+ご go̞
+コント ko̞nto̞
+単純 tɑndʒɯn
+女装 ʒo̞so̞ː
+き ki
+青色 ɑoiro
+ショート ʃo̞ːto̞
+コマツ komɑt͡sɯ
+衣 ko̞ro̞mo̞
+突き飛ばさ tsɯkitobasa
+ぬぐい nɯgɯi
+触る savaɾɯ
+園内 ennɑi
+都合 t͡sɯgo̞ɯ
+断 dɑn
+脚 ɑʃi
+本稿 ho̞nko̞ɯ
+浴 jo̞kɯ
+猛威 mo̞ɯi
+包装 ho̞ːso̞ː
+木材 mokɯzɑi
+ナマ nɑmɑ
+餅 mo̞tʃi
+加藤 kɑtoː
+企業 kigʲo̞ɯ
+久保 kɯbo̞
+否定 hite̞ː
+戻そ mo̞do̞so̞
+振り乱し ɸɯrimidɑʃi
+理数 risɯː
+刺身 sɑʃimi
+露呈 ro̞teː
+中 nɑkɑ
+玉の輿 tɑmɑnokoʃi
+づまりを zɯmari ɯo
+司る tsɯkasadorɯ
+同一 do̞ɯit͡sɯ
+声 ko̞e
+ザラッ zɑrɑ
+試さ misɑ
+媒体 bɑitɑi
+松浦 mɑt͡sɯːrɑ
+近道 tʃikɑmitʃi
+懐かしい nɑt͡sɯkɑʃiː
+マルタ mɑrɯtɑ
+床 jɯkɑ
+常連 dʒo̞ɯren
+健全 ke̞nze̞n
+安東 ɑndoː
+江沢民 koː tɑkɯmin
+文庫 bɯnko̞
+主題歌 ʃɯdɑikɑ
+門出 kɑdode
+注意深く tʃɯːi ɸɯkɑkɯ
+土産 mijɑge
+解析 kɑiseki
+タスク tasɯkɯ
+闘志 to̞ɯʃi
+唇 kɯtʃibirɯ
+返さ kɑesɑ
+せいぜい se̞ːze̞ː
+検証 kenʃo̞ɯ
+漆 ɯrɯʃi
+比率 hirit͡sɯ
+効果 koːkɑ
+ウニョク ɯ ɲo̞ kɯ
+ぜひとも zehi to̞ mo̞
+猛烈 mo̞ɯret͡sɯ
+マラソン mɑɾɑson
+知己 tʃiki
+ヤナセ jɑnɑse
+薄 ɯsɯ
+片や kɑtɑ jɑ
+房総半島 bɯzɯɕɑ̃tɯ
+出過ぎ desɯgi
+繰り返せ kɯɾikaeze
+痛々しい itɑitɑʃiː
+乗馬 dʒoːbɑ
+シングハル ʃingɯ hɑrɯ
+山登り jɑmɑnobori
+来 ki
+近づい tʃikɑzɯi
+みつける mit͡sɯke̞rɯ
+部門 bɯmo̞n
+無意識 mɯiʃiki
+値打ち ne̞ɯtʃi
+チームワーク tʃiːmɯɯaːkɯ
+取り下げ toɾisɑʒe
+スッピン sɯʔpin
+取引 to̞rihiki
+一里塚 itʃi ri t͡sɯkɑ
+価 ɑtɑi
+割り切る ɯarikirɯ
+月末 get͡sɯmɑt͡sɯ
+受け入れる ɯke̞ire̞rɯ
+なし nɑʃi
+一年中 itʃi ne̞ntʃɯː
+トルコ to̞rɯko̞
+整頓 seːto̞n
+デリカフーズ derikɑ ɸɯːzɯ
+稼業 kɑgʲoː
+小鳥 ko̞to̞ri
+爆撃 bɑkɯgeki
+漂う tɑdɑjoɯ
+見事 migo̞to̞
+餌食 e̞dʒiki
+聖書 seːʃo̞
+同省 do̞ɯʃo̞ɯ
+唐突 to̞ɯto̞t͡sɯ
+西尾 niʃio̞
+措置 so̞tʃi
+兼務 ke̞nmɯ
+ナチュラル nɑtʃɯrɑrɯ
+返品 he̞npin
+弱く joɯakɯ
+事情 dʒidʒo̞ɯ
+摘出 te̞kiʃɯt͡sɯ
+闘将 to̞ɯʃo̞ɯ
+ペイ pe̞i
+控えめ hikɑeme
+そうですね so̞ːdesnə
+めい me̞ː
+驚天動地 kʲo̞ɯten do̞ɯtʃi
+良妻 ɾjoːsɑi
+おこす okosɯ
+原料 genrʲo̞ɯ
+引き算 hikizɑn
+キチン kitʃin
+壊す kovasɯ
+正午 ʃo̞ɯgo̞
+史上 ʃidʒo̞ɯ
+動け ɯgo̞ke
+書籍 ʃo̞seki
+閣僚 kɑkɯrʲoː
+セッション sɛsɕɔn
+確定 kɑkɯteː
+さん sɑn
+見栄え mibɑe
+信仰 ʃinko̞ɯ
+エノケン eno̞ken
+あふれ ɑɸɯre
+総菜 soːzɑj
+乗り回し norimaɯaʃi
+をめぐって ɯo megɯʔte
+債 sɑi
+ツワリ t͡sɯɯari
+伝える t͡sɯtɑerɯ
+気づく kizɯkɯ
+エア eɑ
+昨季 sakɯki
+ひねっ hine̞ʔ
+カウンター kɑɯntɑː
+貯める tɑmerɯ
+小笠原 oɰᵝasaɯaɾa
+對辞 tɑi dʒi
+捜し sɑgɑʃi
+アキバ ɑkibɑ
+芳文社 hoːbɯn ʃɑ
+それとも so̞ɾeto̞mo̞
+海賊 kɑizokɯ
+持ち歩く motʃiɑrɯkɯ
+なき nɑki
+突っ込ん t͡sɯʔko̞n
+部位 bɯi
+罰 bɑt͡sɯ
+山本 jɑmɑmoto
+逮捕 tɑiho
+ゴンドラ gondorɑ
+儲かっ moːkɑʔ
+彼 kɑre
+エコノミスト ekonomisɯto
+高値 tɑkɑne
+トレイン to̞rein
+単月 tɑnget͡sɯ
+べ be̞
+ハヤシライス hajaʃizaisɯ
+誠実 se̞ːdʒitsɯ
+評判 çoːbɑn
+がっ gɑʔ
+去る saɾɯ
+着る kirɯ
+かなえ kɑnɑe
+マネー mɑneː
+極める kiɯamerɯ
+売価 bɑikɑ
+多彩 tɑsɑi
+弾圧 danatsɯɯ
+初年度 hɑt͡sɯ nendo
+素早く sɯbajakɯ
+救っ sɯkɯʔ
+石川 iʃikaɯa
+決める kime̞rɯ
+手かせ tekɑse
+腹立ち hɑrɑdɑtʃi
+辛い t͡sɯrɑi
+遣い t͡sɯkɑi
+八 hɑtʃi
+好ま konomɑ
+光ろ hikɑro
+おもちゃ omotʃɑ
+録音 ro̞kɯo̞n
+動かせる ɯgokaserɯ
+原資 ge̞nʃi
+ごくごく go̞kɯgo̞kɯ
+ローン ro̞ːn
+利する ɾisɯɾɯ
+大阪 osɑkɑ
+火葬 kɑsoː
+従量制 dʒɯːrjoːseː
+サイフ saifɯ
+太い ɸɯto̞i
+他国 tɑkokɯ
+合わせ ɑvɑze
+電圧 denatsɯɯ
+境目 sɑkɑime
+浅野 ɑsɑno
+伏せ字 fusedʒi
+針 hɑri
+朝鮮半島 tʃo̞ːsɛnhɑ̃to̞ː
+しまっ ʃimɑʔ
+スクリーン sɯkɯriːn
+抜か nɯkɑ
+雨 ɑme
+お気に入り oki ni hɑiri
+贈与 zo̞ɯjo̞
+取組め to̞rikɯme
+クローズド kɯro̞ːzɯdo̞
+姿勢 ʃise̞ː
+ボリューミー bo̞rʲɯːmiː
+颯爽 sɑʔsoː
+受け付ける ɯke̞t͡sɯke̞rɯ
+ろう ro̞ɯ
+歓声 kɑnseː
+デザイン dezɑin
+黒死病 ko̞kɯʃibʲo̞ɯ
+引き続い hikit͡sɯzɯi
+いこ iko̞
+唄 ɯtɑ
+心臓 ʃinzo̞ɯ
+矛先 hokosɑki
+サラウンド saɾaɯndo
+過ごそ sɯɣoso
+ダム dɑmɯ
+伴い tomonɑi
+不服 ɸɯɸɯkɯ
+客 kʲɑkɯ
+労務 ro̞ɯmɯ
+出かけ dekɑke
+プラン pɯrɑn
+いかが ikɑgɑ
+磨か migɑkɑ
+こういう ko̞ɯ iɯ
+ザル zɑrɯ
+イベント ibento̞
+超える ko̞erɯ
+やれ jɑre
+終え o̞e
+ルーキーイヤー rɯːkiː ijɑː
+堀江 ho̞rie
+上告 dʒo̞ɯko̞kɯ
+鹿児島 kɑgoʃimɑ
+守備 ʃɯbi
+夜 jo̞rɯ
+スプーン sɯpɯːn
+智恵子 tʃieko̞
+うっすら ɯvsɯɾa
+散見 sɑŋkən
+美人 bidʒin
+正に mɑsɑni
+寂れる sabiɾeɾɯ
+オバマバッジ obɑmɑ bɑdʒdʒi
+機知 kitʃi
+返せ kɑeze
+雑用 zɑt͡sɯjoː
+中山 nɑkɑjɑmɑ
+加わる kɯɯaɯarɯ
+飲ん no̞n
+犯し okɑʃi
+見送ら miokɯrɑ
+世論 jo̞ro̞n
+授から dʒɯ kɑrɑ
+側面 sokɯmen
+互換 gokɑn
+木立 kodɑtʃi
+はまっ hɑmɑʔ
+椅子 isɯ
+フジテレビ ɸɯdʒi te̞re̞bi
+務め t͡sɯto̞me
+速く hɑjɑkɯ
+息子 mɯsɯko
+ライ rɑi
+肥満 himɑn
+露出 ro̞ʃɯt͡sɯ
+名刺 me̞ːʃi
+乗り場 noribɑ
+ブーティ bɯːti
+丸く mɑrɯkɯ
+興味津々 kʲo̞ɯmi ʃinʃin
+ウクライナ ɯkɯrɑinɑ
+残す nokosɯ
+アナ ɑnɑ
+補充 ho̞dʒɯː
+電化 denkɑ
+兄 ɑni
+意向 iko̞ɯ
+外需 gɑidʒɯ
+報告 ho̞ɯko̞kɯ
+後背 koːhɑi
+シニア ʃiniɑ
+友達 tomodɑtʃi
+弱る joɯarɯ
+揚がっ ɑgɑʔ
+身構え migɑmɑe
+開け ɑke
+その間 sonomɑ
+ゴルファー gorɯɸɑː
+幕開け mɑkɯɑke
+出 de̞
+たとえ tɑtoe
+試合 ʃiɑi
+郷里 kʲo̞ɯri
+ソックス sokekɯsɯ
+過ち ɑjɑmɑtʃi
+宇佐美 ɯsami
+少子 ʃo̞ɯʃi
+事業 dʒigʲo̞ɯ
+うなずい ɯnɑzɯi
+ダメージ dɑmeːdʒi
+ペリッ pe̞ri
+広げる hiro̞gerɯ
+しゅう ʃɯː
+証言 ʃo̞ɯgen
+サウンド saɯndo
+麗 re̞ː
+ハゲタカ hɑgetɑkɑ
+藤里 fɯxisato
+弱音 joɯane
+慣行 kɑnkoː
+春場所 hɑrɯ bɑʃo
+生き物 ikimo̞no̞
+シャイ ʃɑi
+東京都立大学 tokʲo torit͡sɯ dɑigɑkɯ
+深める ɸɯkɑmerɯ
+血液 ke̞t͡sɯe̞ki
+ええ e̞ː
+政府 seːfɯ
+拡張 kɑkɯtʃoː
+正座 seːzɑ
+渋谷 ʃibɯjɑ
+向こう mɯko̞ɯ
+かむ kɑmɯ
+ウエディング ɯe̞dingɯ
+系列 ke̞ːre̞t͡sɯ
+断片 dɑnpen
+波 nɑmi
+抜きんで nɯki n de̞
+太郎 tɑroː
+平成 he̞ːse̞ː
+伸ばし nobɑʃi
+素晴らし sɯbaɾaʃi
+供養 kɯjo̞ɯ
+あれこれ ɑrekore
+付きもの t͡sɯkimo̞no̞
+具 gɯ
+治っ nɑoʔ
+引き延ばす hikinobasɯ
+済ませ sɯmase
+にまつわる ni mat͡sɯɯarɯ
+操作 soːsɑ
+エコノミー eko̞no̞miː
+観戦 kɑnsɯn
+感想 kɑnsoː
+施行 ʃiko̞ɯ
+壊れ koɯare
+うたかた ɯtɑkɑtɑ
+根底 ko̞nteː
+略称 rʲɑkɯʃoː
+サード sɑːdo
+訪ね tɑzɯne
+気持ちよく kimo̞tʃi jo̞kɯ
+変調 hentʃo̞ɯ
+川南 kaɯaminami
+興味深かっ kʲoːmi bɯkɑkɑʔ
+所以 jɯe̞n
+駆け込ん kɑkekon
+ハンデ hɑnde
+ゃんけん n ke̞n
+あきらめ ɑkirɑme
+搾取 sakɯʃɯ
+触れ合う ɸɯreɑɯ
+親友 ʃiɲjɯː
+かしこ kɑʃiko
+大室山 oːmɯro jɑmɑ
+一回り itʃi maɯari
+工学部 koːgɑkɯ bɯ
+ノイズ no̞izɯ
+慌てる aɯaterɯ
+反論 hɑnron
+大喜 dɑiki
+球面 kʲɯːme̞n
+憧れ ɑkogɑre
+序盤 dʒobɑn
+主義 ʃɯgi
+資質 ʃiʃit͡sɯ
+多田 tɑdɑ
+会場 kɑidʒoː
+極小 kʲo̞kɯʃo̞ɯ
+風下 kɑzɑʃimo
+山盛り jɑmɑmori
+送信 soːʃin
+ただし tɑdɑʃi
+臨め no̞zo̞me
+死ぬ ʃinɯ
+推測 sɯisokɯ
+潰瘍 kɑijoː
+土俵 do̞ço̞ɯ
+春秋 ʃɯndʒɯː
+書き出し kɑkidɑʃi
+軽率 keːsotsɯ
+家長 kɑtʃoː
+好き嫌い sɯkikiɾai
+衰退 sɥitɑi
+上村 ɯemɯrɑ
+難し mɯzɯkɑʃi
+格闘技 kɑkɯtoːgi
+懐かしむ nɑt͡sɯkɑʃimɯ
+からだ kɑrɑdɑ
+ウォーク o̞ːkɯ
+中団 tʃɯːdɑn
+安く jasɯkɯ
+幻聴 gentʃo̞ɯ
+詰まる t͡sɯmɑrɯ
+正 tɑdɑʃi
+タキシード tɑkiʃiːdo
+マドフ mɑdoɸɯ
+無上 mɯdʒo̞ɯ
+秘 hi
+ろくに ro̞kɯni
+家紋 kɑmon
+タイプラインストーンタイプ taipɯrainsɯtoːntaipɯ
+自尊心 dʒizo̞nʃin
+浜町 hɑmɑtʃoː
+辰巳 tɑt͡sɯmi
+本場 honbɑ
+海軍 kɑigɯn
+伸し no̞ʃi
+泥棒 do̞ro̞bo̞ɯ
+嘔吐 o̞ɯto̞
+煎り iri
+従って ʃitɑgɑʔte
+定め sɑdɑme
+引き hiki
+破っ jɑbɯʔ
+え e̞
+企画 kikɑkɯ
+サラエボ sɑɾɑebo
+れる re̞rɯ
+温める ɑtɑtɑmerɯ
+悪影響 ɑkɯeːkʲoː
+付か t͡sɯkɑ
+活路 kɑt͡sɯro
+判明 hɑnmeː
+来夏 rɑikɑ
+行う okonɑɯ
+重かっ omokɑʔ
+数々 kɑzɯkɑzɯ
+怪しい ɑjɑʃiː
+思う o̞mo̞ɯ
+られ rɑre
+厳しい kibiʃiː
+予報 jo̞ho̞ɯ
+トリートメント to̞riːto̞mento̞
+毎度 mɑido
+パラダイム pɑrɑdɑimɯ
+モチベーション mo̞tʃibeːʃo̞n
+断罪 dɑnzɑi
+指す sasɯ
+全力 zenrʲo̞kɯ
+歯医者 hɑ iʃɑ
+肺 hɑi
+名曲 meːkʲo̞kɯ
+院 in
+おごっ o̞go̞ʔ
+確立 kɑkɯrit͡sɯ
+広島 hiroʃimɑ
+ボク bo̞kɯ
+幻想 genso̞ː
+常雄 t͡sɯneo̞
+ムード mɯːdo̞
+タフ tɑɸɯ
+フレーバー ɸɯreːbɑː
+痩せ jɑze
+花見 hɑnɑmi
+植木 ɯe̞ki
+放し hɑnɑʃi
+片手 kɑtɑte
+七 nɑnɑ
+すぐさま sɯɰᵝazama
+楽 rɑkɯ
+某 bo̞ɯ
+姉 ɑne
+芸事 geːgo̞to̞
+裁判 sɑibɑn
+黒っぽい kɯro̞ʔpo̞i
+天敵 te̞nte̞ki
+成し遂げ nɑʃitoge
+延々と ɛnɛnto̞
+陥る o̞tʃiirɯ
+ふるまえ ɸɯrɯmɑe
+帰還 kikɑn
+矢口 jɑgɯtʃi
+顔つき kɑot͡sɯki
+うなぎ ɯnɑgi
+冠 kɑn
+早急 saʔkjɯː
+牛乳 gʲɯːɲɯː
+ジェニファー dʒeniɸɑː
+エクセーヌ ekɯseːnɯ
+工夫 kɯɸɯː
+かかわら kakaɯara
+偽物 nisemo̞no̞
+美少女 bi ʃo̞ɯdʒo̞
+片方 kɑtɑhoː
+真実 ʃindʒit͡sɯ
+チアキ tʃiɑki
+問い詰める to̞it͡sɯmerɯ
+凍る ko̞ːrɯ
+しろ ʃiro̞
+設立 setsɯɾitsɯ
+穀倉 kokɯgɯrɑ
+刊行 kɑnkoː
+上手かっ ɯmɑkɑʔ
+喧騒 kenso̞ː
+冬 ɸɯjɯ
+ゴールデンウィーク goːrɯden ɯiːkɯ
+マリノス maɾinosɯ
+仲間 nɑkɑmɑ
+叩きつけ tɑtɑkit͡sɯke
+グラス gɯɾasɯ
+生徒 seːto̞
+閑散 kɑnsɑn
+敬遠 ke̞ːe̞n
+レイ re̞ː
+弁当 bento̞
+コーチ ko̞ːtʃi
+落とす otosɯ
+巡回 dʒɯnkɑi
+イラン irɑn
+ちゃんこ tʃɑnko
+知り合う ʃiriɑɯ
+バイオリン bɑiorin
+好 jo̞ʃimi
+下院 kɑin
+アマダイ ɑmɑdɑi
+問いかける toikɑkerɯ
+一文 kɑzɯɸɯmi
+明星 mʲo̞ɯdʒo̞ɯ
+引き離し hikihɑnɑʃi
+愛着 ɑitʃɑkɯ
+貧しい mɑzɯʃiː
+銃殺 jɯːsatsɯ
+にとって ni to̞ʔte
+シンクロ ʃinkɯro̞
+かたち kɑtɑtʃi
+騒然と so̞ːzɛnto̞
+食い込める kɯiko̞merɯ
+短時間 tɑndʒikɑn
+イギリス igiɾisɯ
+教え o̞ʃie
+遠距離 enkʲo̞ri
+たら tɑrɑ
+パクリ pɑkɯri
+バレンタインデー bɑrentɑin deː
+かぎ kɑgi
+しゃべる ʃɑberɯ
+味付け ɑdʒit͡sɯke
+膨らん ɸɯkɯrɑn
+参り mɑiri
+真紀子 mɑkiko
+指導 ʃido̞ɯ
+テポドン tepo̞do̞n
+濡れ手で粟 nɯrete de aɯa
+歩き ɑrɯki
+つぶやい t͡sɯbɯjɑi
+喩え tɑtoe
+キツネ kit͡sɯne̞
+なくなっ nɑkɯnɑʔ
+本多 hondɑ
+高く tɑkɑkɯ
+楽屋裏 gɑkɯjɑ ɯrɑ
+耳目 dʒimo̞kɯ
+入場 ɲɯːdʒo̞ɯ
+ソフトウェア sofɯtoea
+固まり kɑtɑmɑri
+地元 dʒimo̞to̞
+すんなり sɯnnaɾi
+耐用 tɑijoː
+首席 ʃɯseki
+欲しい ho̞ʃiː
+打ち合い ɯtʃiɑi
+分母 bɯnbo̞
+自動車 dʒidoːʃɑ
+人 hito̞
+山中 jɑmɑnɑkɑ
+同館 doːkɑn
+動揺 do̞ɯjo̞ɯ
+今宵 ko̞jo̞i
+インフレ inɸɯre̞
+充分 dʒɯːbɯn
+トラ torɑ
+平価 heːkɑ
+保護 ho̞go̞
+心がけ kokorogɑke
+ささい sɑsɑi
+ない nɑi
+万 mɑn
+もれる mo̞rerɯ
+しまえ ʃimɑe
+閉め ʃime̞
+ヘタクソ hetakɯso
+あ ɑ
+立てる tɑterɯ
+誘い sɑsoj
+我が ɯaga
+長引く nɑgɑbikɯ
+事実 dʒidʒit͡sɯ
+減少 genʃo̞ɯ
+甘受 kɑndʒɯ
+ばれる bɑrerɯ
+併せ ɑvɑze
+訪韓 hoːkɑn
+扶養 ɸɯjo̞ɯ
+進展 ʃinte̞n
+離婚 riko̞n
+ハードボイルド hɑːdo boirɯdo
+日枝 hiedɑ
+美肌 bihɑdɑ
+うわさ ɯwasa
+現実 ge̞ndʒit͡sɯ
+温 o̞n
+いする isɯɾɯ
+燃え尽き mo̞et͡sɯki
+かかと kɑkɑto
+取り付ける to̞rit͡sɯkerɯ
+人権 dʒinke̞n
+山菜 sɑ̃saj
+貸し切り kɑʃikiri
+大久保 o̞ːkɯbo̞
+幅広い hɑbɑhiroi
+ストーリー sɯtoːriː
+踏みきれ ɸɯmikire̞
+一皮剥け itʃi kaɯamɯke
+ぎりぎり girigiri
+で de̞
+重ね kɑsɑne
+インタゲ intɑge
+湧き ɯaki
+遠回し toːmaɯaʃi
+浮き浮き ɯkiɯki
+しょっちゅう ʃo̞ʔtʃɯː
+視線 ʃizən
+滲ん nidʒin
+エネルギッシュ eneɾɯɡisʃɯ
+生じ ʃo̞ɯdʒi
+ウケ ɯke̞
+保全 ho̞zen
+森本 mo̞rimo̞to̞
+オール o̞ːrɯ
+続け t͡sɯzɯke̞
+和装 ɯasoː
+ウキウキ ɯkiɯki
+寄り付き jo̞rit͡sɯki
+さらり sɑɾɑɾi
+高齢 ko̞ːreː
+新橋 ʃinbɑʃi
+本来 honrɑi
+ああ ɑː
+手すり tesɯɾi
+滅入ら meirɑ
+常駐 dʒo̞ɯtʃɯː
+長かっ nɑgɑkɑʔ
+難しい mɯzɯkɑʃiː
+バラ bɑrɑ
+供給 kʲo̞ɯkʲɯː
+湾 ɯan
+キャラクター kʲɑrɑkɯtɑː
+要注意 jo̞ɯtʃɯːi
+ツラ t͡sɯrɑ
+ベルト berɯto̞
+ワード ɯaːdo
+阪神 hɑnʃin
+打って出る ɯʔte̞ de̞rɯ
+あまた ɑ mɑtɑ
+材 zɑi
+かかん kɑkɑn
+コマーシャル komɑːʃɑrɯ
+とっても to̞ʔtemo̞
+ことし ko̞to̞ʃi
+出力 ʃɯt͡sɯrʲo̞kɯ
+マキカレ mɑki kɑre
+整っ to̞to̞no̞ʔ
+過半数 kahansɯː
+タレント tɑrento
+対 tɑi
+流す nagasɯ
+フィールド ɸiːrɯdo̞
+特番 tokɯbɑn
+ムーブメント mɯːbɯmento̞
+たわむれる taɯamɯrerɯ
+追加 t͡sɯikɑ
+投機 to̞ɯki
+癒さ ijɑsɑ
+いい iː
+小田嶋 odɑdʒimɑ
+皮膚 hiɸɯ
+切ない setsɯnɑi
+失明 ʃit͡sɯme̞ː
+王 o̞ɯ
+ソチ so̞ci
+選択 sentakɯ
+溜息 tɑmeiki
+道中 do̞ɯtʃɯː
+良好 rʲo̞ɯko̞ɯ
+病みつき jɑmit͡sɯki
+ウェルチ e̞rɯtʃi
+お詫び oɯabi
+みなとみらい minɑtomirɑi
+仁川 intʃo̞n
+丸 mɑrɯ
+おとなしい otonɑʃiː
+専用 senjo̞ː
+近況 kinkʲo̞ɯ
+厨川 kɯrijagaɯa
+起きる o̞kirɯ
+へた hetɑ
+溢れ ɑɸɯre
+香取 kɑtori
+印刷 insɑtsɯ
+ぞ zo̞
+加速 kasokɯ
+諏訪 sɯva
+貴乃花 tɑkɑnohɑnɑ
+フェイス feisɯ
+リズム rizɯmɯ
+終結 ʃɯːke̞t͡sɯ
+モンタ mon tɑ
+スター sɯtaɕi
+地検 tʃike̞n
+空振り kɑrɑbɯri
+なぜ nɑze
+ファウル ɸɑɯrɯ
+手がかり tegɑkɑri
+穴埋め ɑnɑɯme
+キーホルダー kiː horɯdɑː
+乗っ no̞ʔ
+輩 jɑkɑrɑ
+古墳 ko̞ɸɯn
+背負い seo̞j
+キュン kʲɯn
+そろえ so̞ɾo̞e
+パイオニア pɑioniɑ
+理想 riso̞ː
+かわいそう kaɯaisoː
+基調 kitʃo̞ɯ
+賞賛 ʃoːsan
+余所見 jo̞so̞mi
+要諦 jo̞ːteː
+メンタルトレーナー mentɑrɯ toreːnɑː
+毛糸 keːto̞
+創価学会 soːkagaʔkai
+ゴーサイン goːsɑin
+強み t͡sɯjo̞mi
+艦 kɑn
+本日 ho̞ndʒit͡sɯ
+ポシャっ poʃɑʔ
+市内 ʃinɑi
+程なく hodo nɑkɯ
+ごと go̞to̞
+歩い ɑrɯi
+大嫌い dɑikirɑi
+プレゼンター pɯrezentɑː
+下手 hetɑ
+廃人 hɑidʒin
+早々 hɑjɑbɑjɑ
+愛人 ɑidʒin
+心遣い kokorozɯkɑi
+好み ko̞no̞mi
+観察 kɑnsɑtsɯ
+居 i
+高級 ko̞ɯkʲɯː
+仲介 tʃɯːkɑi
+奇襲 kiʃɯː
+兄弟 kʲoːdɑi
+ブルー bɯrɯː
+咲き乱れ sɑkimiðɑɾe
+にわたって ni ɯataʔte
+あっという間 ɑʔ to iɯ mɑ
+驚き o̞do̞ro̞ki
+恥ずかし hɑzɯkɑʃi
+後世 ko̞ːseː
+石井 iʃii
+実名 dʒit͡sɯme̞ː
+未来 mirɑi
+衣料 irʲo̞ɯ
+シックス ʃikekɯsɯ
+どぎつい do̞git͡sɯi
+喜多 kitɑ
+逃げ nige̞
+宙 tʃɯː
+燃え mo̞e
+甘 kɑn
+臨場 rindʒo̞ɯ
+錦 niʃiki
+農場 no̞ɯdʒo̞ɯ
+アイデア ɑideɑ
+オススメ osɯsɯme
+前提 ze̞nte̞ː
+押さえる osaeɾɯ
+黄河 koːgɑ
+総論 so̞ːɾo̞n
+要し jo̞ɯʃi
+聞き覚え kikio̞bo̞e
+ベース beːsɯ
+小さく tʃiːsakɯ
+ゆるみ jɯrɯmi
+加筆 kɑhit͡sɯ
+至り itɑri
+生理 se̞ːɾi
+警部補 keːbɯho̞
+すうっ sɯɯʔ
+駅 e̞ki
+アイライン ɑi rɑin
+我が国 ɯaga kɯni
+杜 mo̞ri
+飲料 inrʲo̞ɯ
+バンダイコリア bɑndɑi koriɑ
+誘致 jɯːtʃi
+いやー ijɑː
+とりまとめ torimɑtome
+かつら kɑt͡sɯrɑ
+岩瀬 ivɑze
+会 kɑi
+民主 minʃɯ
+戻り mo̞do̞ri
+住 dʒɯː
+デビュー de̞bʲɯː
+小銭 ko̞zeni
+未納 mino̞ɯ
+下部 kɑbɯ
+説得 seʔtokɯ
+斬新 zɑnʃin
+雄弁 jɯːbe̞n
+他者 tɑʃɑ
+手元 temo̞to̞
+明記 me̞ːki
+疑っ ɯtɑgɑʔ
+レス rezɯ
+節電 se̞tsɯde̞n
+辞任 dʒinin
+ピロリ piro̞ri
+延命 e̞nme̞ː
+菓子パン kɑʃi pɑn
+底入れ so̞ko̞iɾe
+原点 ge̞nte̞n
+分かっ ɯakaʔ
+コツコツ ko̞t͡sɯko̞t͡sɯ
+配当落ち hɑitoː otʃi
+順位 dʒɯni
+身頃 migo̞ro̞
+支払っ ʃihɑrɑʔ
+押し上げ oʃiɑge
+雫石 ʃizɯkɯiʃi
+ポジショニング po̞dʒiʃo̞ningɯ
+おもむろに o̞mo̞mɯro̞ni
+写真 ʃɑʃin
+タンス tansɯ
+読めれ jo̞mere
+誤魔化し gomɑkɑʃi
+物資 bʊsʃi
+たばこ tɑbɑko
+デリヘル de̞rihe̞rɯ
+弁護士 bengo̞ʃi
+小島 kodʒimɑ
+伝蔵 denzo̞ɯ
+千秋 tʃiɑki
+カタチ kɑtɑtʃi
+迷彩 meːsɑi
+黙っ dɑmɑʔ
+嫌 ijɑ
+釜山 ɸɯzɑn
+メディカル medikɑrɯ
+その後 sonoɑto
+返る kɑerɯ
+一大 itʃidɑi
+向き合わ mɯkiaɯa
+肉体 nikɯtɑi
+不信 ɸɯʃin
+端子 tɑnʃi
+花 hɑnɑ
+クロ kɯro̞
+田家 tɑjɑ
+格別 kɑkɯbet͡sɯ
+眺め nɑgɑme
+将軍 ʃo̞ɯgɯn
+アイルランド ɑirɯrɑndo
+美味かろ ɯmɑkɑro
+巡 dʒɯn
+わが子 ɯaga ko
+高らか tɑkɑrɑkɑ
+合弁 go̞ɯben
+被っ kɑbɯʔ
+ボリューム bo̞rʲɯːmɯ
+蚊 kɑ
+襲来 ʃɯːrɑi
+残し no̞ko̞ʃi
+委託 itɑkɯ
+女の子 onnɑnoko
+慣れろ nɑrero
+モーツァルト moːt͡sɑrɯto
+感覚 kɑnkɑkɯ
+当分 to̞ɯbɯn
+そうなると soːnarɯto
+横浜 jokohɑmɑ
+申込 mo̞ɯʃiko̞mi
+勇気 jɯːki
+至れ itɑre
+ギリシャ giriʃɑ
+定員 te̞ːin
+南端 nɑntɑn
+末広 sɯehiɾo
+移入 iɲɯː
+課税 kɑzeː
+固まっ kɑtɑmɑʔ
+大変 tɑihen
+消去 ʃo̞ɯkʲo̞
+示談 dʒidɑn
+働い hɑtɑrɑi
+鋭い sɯɾɯdoj
+サクサク sakɯsakɯ
+論理 ro̞nri
+失 ʃit͡sɯ
+教室 kʲo̞ɯʃit͡sɯ
+カツ kɑt͡sɯ
+増加 zoːkɑ
+こわもて koɯamote
+絶ち tɑtʃi
+和名 ɯamʲoː
+ふざけ ɸɯzɑke
+苦しめる kɯrɯʃime̞rɯ
+アキ ɑki
+投げかけ nɑgekɑke
+叩か tɑtɑkɑ
+表示 ço̞ɯdʒi
+だ dɑ
+くすぶり kɯsɯbɯɾi
+ほうりだし hoːridɑʃi
+使わ t͡sɯkaɯa
+ジェピー dʒe̞piː
+早口 hɑjɑkɯtʃi
+検挙 kenkʲo̞
+阪神タイガース hanʃintaiɡaasɯ
+現象 genʃo̞ɯ
+がむしゃら gɑmɯʃɑrɑ
+フェニックス fenikekɯsɯ
+躍り出る o̞do̞riderɯ
+楽しま tɑnoʃimɑ
+南米 nɑnbeː
+墓所 bo̞ʃo̞
+瞬時 ʃɯndʒi
+保つ tɑmot͡sɯ
+練乳 re̞nɲɯː
+データ deːtɑ
+設え ʃit͡sɯrɑe
+赤十字 ɑkɑ dʒɯːdʒi
+宮田 mijɑtɑ
+ランチ rɑntʃi
+ミニカー minikɑː
+過密 kɑmit͡sɯ
+裁定 sɑiteː
+根っから neʔ kɑrɑ
+慣れ nɑre
+向かう mɯkɑɯ
+悪い ɯarɯi
+ふたり ɸɯtɑri
+響い hibii
+変換 henkɑn
+そういった soːiʔtɑ
+川澄 kavasɯmi
+立ち上がら tɑtʃiɑgɑrɑ
+ゆかり jɯkɑri
+提供 teːkʲo̞ː
+接着 setʃakɯ
+リブロ ribɯro̞
+たたずむ tɑtɑzɯmɯ
+難しく mɯzɯkɑʃikɯ
+男 o̞to̞ko̞
+黒色 kɯro̞iro̞
+時差 xisɑ
+販路 hɑnro
+中傷 tʃɯːʃo̞ɯ
+呼べる jo̞berɯ
+マスメディア masɯmedia
+全容 zeɲɯ
+栗栖 kɯɾisɯ
+朝礼 tʃo̞ɯreː
+アミノ酸 ɑminosɑn
+おける o̞kerɯ
+欠く kɑkɯ
+敬語 keːgo̞
+前期 ze̞nki
+若者 ɯakamono
+荒らさ ɑβɑɾɑsɑ
+通期 t͡sɯːki
+エクササイズ ekɯsasaizɯ
+報 ho̞ɯ
+悪女 ɑkɯdʒo
+償う t͡sɯgɯnɑɯ
+もしや moʃi jɑ
+ばれ bɑre
+降ろす oɾosɯ
+次回 dʒikɑi
+優れ sɯɣɯɾe
+お隣 otonɑri
+休刊 kʲɯːkɑn
+新潮社 ʃintʃoː ʃɑ
+晩酌 bɑnɑkɯ
+一種 iʔʃɯ
+蹴っ ke̞ʔ
+クライアント kɯrɑiɑnto
+当たら ɑtɑrɑ
+あけっぴろげ ɑkeʔpiroge
+ますます masɯmasɯ
+滑らす sɯbeɾasɯ
+返し kɑeʃi
+手作り te̞zɯkɯri
+自己 dʒiko̞
+珍しく mezɯrɑʃikɯ
+受益 dʒɯe̞ki
+群がる mɯrɑgɑrɯ
+人後 dʒingo̞
+引き立つ hikitɑt͡sɯ
+はぶく hɑbɯkɯ
+代替わり daigaɯari
+勤務 kinmɯ
+貯蔵 tʃo̞zo̞ɯ
+脱税 dɑt͡sɯzeː
+菜の花 nɑnohɑnɑ
+裏側 ɯragaɯa
+近江 o̞ɯmi
+ズレ zɯre̞
+飼い kɑi
+ハード hɑːdo
+究極 kʲɯːkʲo̞kɯ
+廃 hɑi
+遺志 iʃi
+絡ん kɑrɑn
+年代 nendɑi
+間際 magiɯa
+読む jo̞mɯ
+耕作 koːsakɯ
+上がら ɑgɑrɑ
+増量 zo̞ɯrʲo̞ɯ
+銃撃 dʒɯːge̞ki
+先週 senʃɯː
+口止め kɯtʃido̞me
+ハート hɑːto
+こじつけ ko̞dʒit͡sɯke
+前進 ze̞nʃin
+恐らく osoɾakɯ
+離脱 ridɑt͡sɯ
+日本赤十字社 niʔponsekijɯːdʒiʃa
+知恵袋 tʃie ɸɯkɯro̞
+藤間 ɸɯdʒimɑ
+単語 tɑngo
+ミサイル misaiɾɯ
+開始 kɑiʃi
+慣れる nɑrerɯ
+そのもの so̞no̞mo̞no̞
+謂 ii
+りん rin
+チャレンジング tʃɑrendʒingɯ
+撮る to̞rɯ
+イトイ ito̞i
+英子 eːko̞
+手頃 tego̞ro̞
+ウニ ɯni
+海賊版 kɑizokɯ bɑn
+けが人 kegɑnin
+売れれ ɯre̞re̞
+結ぶ mɯsɯbɯ
+座席 zɑseki
+武者 mɯʃɑ
+愛情 ɑidʒoː
+細分 saibɯn
+判る ɯakarɯ
+にわたる ni ɯatarɯ
+車 kɯrɯmɑ
+迷妄 meːmo̞ː
+住み替え sɯmikae
+もたせ motɑse
+冷やし hijɑʃi
+戒める imɑʃimerɯ
+特典 to̞kɯten
+のぞく no̞zo̞kɯ
+接し se̞sʃi
+冷え hie̞
+必ずや kɑnɑrɑzɯ jɑ
+おむつ o̞mɯt͡sɯ
+転勤 te̞nkin
+富士重工 ɸɯdʒi dʒɯːko̞ɯ
+それだけ soɾedɑke
+立ちすくん tatʃisɯkɯn
+マレーシア mɑreːʃiɑ
+落胆 rɑkɯtɑn
+出現 ʃɯt͡sɯge̞n
+矯正 kjo̞ːseː
+ジオラマ dʒiorɑmɑ
+ベニス benisɯ
+聞ける kike̞rɯ
+ちなん tʃinɑn
+座り sɯwaɾi
+歓談 kɑndɑn
+人生 dʒinseː
+筋肉 kinnikɯ
+瀬戸内海 setonɑikɑi
+騒音 so̞ːo̞n
+ハイライト hɑirɑito
+みりん mirin
+けさ kesɑ
+負っ o̞ʔ
+鳴り物入り nɑrimonoiri
+たわけ taɯake
+に対する nitaisɯɾɯ
+遂行 sɯikoː
+とっくり to̞ʔkɯri
+糸口 ito̞gɯtʃi
+英雄 e̞ːjɯː
+ペナルティ penɑrɯti
+未來 mirɑi
+東戸塚 higɑʃi tot͡sɯkɑ
+分譲 bɯndʒo̞ɯ
+撃退 gekitɑi
+引き継が hikit͡sɯgɑ
+長島 nɑgɑʃimɑ
+求める mo̞to̞merɯ
+同 do̞ɯ
+うんざり ɯnzɑri
+地銀 tʃigin
+ためらわ tameraɯa
+南アフリカ minɑmiɑɸɯrikɑ
+今朝 kesɑ
+シグマ ʃigɯmɑ
+モンテレイ mo̞nterei
+往生際 ɯʒɯsai
+使え t͡sɯkɑe
+勘違い kɑntʃigɑi
+ハズ hɑzɯ
+遺品 ihin
+額 gɑkɯ
+漂流 ço̞ɯrʲɯː
+喜劇 kige̞ki
+踊り場 odoribɑ
+リソース risoːsɯ
+たくましい tɑkɯmɑʃiː
+切り kiri
+たまる tɑmɑrɯ
+脅迫 kʲoːhɑkɯ
+同窓 do̞ːso̞ː
+年賀状 nengɑdʒoː
+数カ月 sɯːkagetsɯ
+美 bi
+平均 he̞ːkin
+ホワイトカラー hoɯaito karaː
+搭載 tozɑː
+院長 intʃo̞ɯ
+ベランダ berɑndɑ
+アフガン ɑɸɯgɑn
+演じ e̞ndʒi
+裏山 ɯrɑjɑmɑ
+右肩 migikɑtɑ
+付き合い t͡sɯkiɑi
+不毛 ɸɯmo̞ɯ
+ホームレス hoːmɯɾesɯ
+酒場 sɑkɑbɑ
+硬貨 koːkɑ
+ほう ho̞ɯ
+曲がっ mɑgɑʔ
+包ん t͡sɯt͡sɯn
+旨味 ɯmɑmi
+アドバイス adobaisɯ
+同意 do̞ɯi
+受け ɯke̞
+カラフル kɑrɑɸɯrɯ
+現場 genbɑ
+押しつけ o̞ʃit͡sɯke
+まやかし mɑjɑkɑʃi
+髪型 kɑmigɑtɑ
+帰宅 kitɑkɯ
+円城 endʒo̞ɯ
+よ jo̞
+音量 o̞nrʲo̞ɯ
+おぼしき o̞bo̞ʃiki
+両手 rʲo̞ɯte
+文化庁 bɯnkɑtʃoː
+つき t͡sɯki
+前足 mɑeɑʃi
+中でも nɑkɑ de mo
+このほど ko̞no̞ ho̞do̞
+また mɑtɑ
+誤差 gosɑ
+かみ合わ kamiaɯa
+申し分 mo̞ɯʃibɯn
+治癒 tʃijɯ
+たのしも tɑnoʃimo
+効い kii
+オンエアー ɔneɑ
+誤 go̞
+盛岡 moriokɑ
+わずか ɯazɯka
+募集 bo̞ʃɯː
+日々 hibi
+わかっ ɯakaʔ
+型 kɑtɑ
+望め no̞zo̞me
+ドア doɑ
+教習所 kʲo̞ɯʃɯːʃo̞
+遊ぶ asobɯ
+メンタルヘルス mentaɾɯheɾɯsɯ
+優作 jɯːsakɯ
+キャリア kʲɑriɑ
+力強 tʃikɑrɑzɯjo
+毛穴 keɑnɑ
+財務諸表 zɑimɯ ʃoçoː
+克也 kɑt͡sɯjɑ
+同時に do̞ɯdʒi ni
+うそうそ ɯsoɯso
+サイド sɑido
+油断 jɯdɑn
+運べ hɑkobe
+ラブソング rabɯsoŋgɯ
+記述 kidʒɯt͡sɯ
+物流 bɯt͡sɯrʲɯː
+見方 mikɑtɑ
+プレーヤー pɯreːjɑː
+厳しかっ kibiʃikɑʔ
+出遅れ de o̞kɯre
+戦い tɑtɑkɑi
+世 jo̞
+しかり ʃikɑri
+微動 bido̞ɯ
+務まり t͡sɯtomɑri
+朝倉 asakɯra
+優 jɯː
+商談 ʃoːdɑn
+桁 ketɑ
+生え hɑe
+ベリテ be̞ri te̞
+銅メダル doː medɑrɯ
+街頭 gɑitoː
+連 re̞n
+球 tɑmɑ
+お許し o̞jɯrɯʃi
+大槌 o̞ːt͡sɯtʃi
+いっ iʔ
+滴り ʃitɑtɑri
+代理 dɑiri
+ロレックス roɾeʔkɯsɯ
+キツ kit͡sɯ
+喜べ jo̞ro̞ko̞be
+直人 nɑoto
+不向き ɸɯmɯki
+大勝 dɑiʃoː
+フューチャリング ɸjɯːtʃɑringɯ
+お新香 o̞ʃinko̞
+キャラ kʲɑrɑ
+救命 kʲɯːme̞i
+料理 rʲo̞ːri
+まし mɑʃi
+伊勢丹 isetɑn
+面倒 mendo̞ɯ
+書簡 ʃokɑn
+明豊 meː jɯtɑkɑ
+集まろ ɑt͡sɯmɑro
+再審 sɑiʃin
+白羽 ʃirɑhɑ
+近隣 kinrin
+地道 dʒimitʃi
+スピリット sɯpiɾito
+イブ ibɯ
+通さ toːsɑ
+開業 kɑigʲoː
+和らげ jaɯarage
+意識 iʃiki
+息災 sokɯsai
+絶え tɑe
+可愛 kaɯai
+丁重 teitʃo̞ː
+今川 imagaɯa
+洗脳 senno̞ː
+投票 to̞ɯço̞ɯ
+穴蔵 ɑnɑgɯrɑ
+転居 tenkʲo̞
+に ni
+幸之助 koːnosɯke
+割り出し ɯaridaʃi
+スタイリスト sɯtaiɾisɯto
+ハーモニー hɑːmoniː
+まで mɑ de
+買っ kɑʔ
+プレー pɯre̞ː
+政争 seːso̞ː
+延び no̞bi
+開 kɑi
+整列 se̞ːɾe̞tsɯ
+磯 iso̞
+つく t͡sɯkɯ
+差し止め sɑʃitome
+アゲ ɑge
+気付い kizɯi
+金子 kɑneko
+速い hɑjɑi
+逃げ切っ nige̞ kiʔ
+大王 dɑioː
+家賃 jɑtʃin
+奇妙 kimʲo̞ɯ
+ショボ ʃo̞bo̞
+閣議 kɑkɯgi
+背丈 setɑke
+大国 tɑikokɯ
+世評 sehʲo̞ː
+津川 t͡sɯgaɯa
+無視 mɯʃi
+時刻 dʒiko̞kɯ
+挨拶 ɑisɑtsɯ
+平等 bʲo̞ɯdo̞ɯ
+普遍 ɸɯhe̞n
+カーラ kɑːrɑ
+石器 se̞ʔki
+混ぜ mɑze
+どれ do̞re
+何とか nɑn to kɑ
+さえ sɑe
+バロメーター bɑromeːtɑː
+任天堂 nintendo̞ɯ
+おまえ omɑe
+催す mojoosɯ
+別条 bet͡sɯdʒo̞ɯ
+おっとり o̞ʔto̞ri
+守る mɑmorɯ
+著しい itʃidʒirɯʃiː
+冷静 ɾe̞ːse̞ː
+懸け kɑke
+來 ?
+福竜丸 ɸɯkɯrʲɯːmɑrɯ
+一律 itʃirit͡sɯ
+受け身 ɯke̞mi
+燃費 ne̞npi
+煩う ɯazɯraɯ
+カヌー kɑnɯː
+気 ki
+最高 sɑikoː
+現役 ɡɛnɛki
+とよの to̞ jo̞ no̞
+星 ho̞ʃi
+がらみ gɑrɑmi
+遊び場 ɑsobibɑ
+練り直し nerinɑoʃi
+延坪 no̞bet͡sɯbo̞
+ベスト besɯto
+お o̞
+お昼 o̞hirɯ
+対決 tɑiket͡sɯ
+許容 kʲo̞jo̞ɯ
+業者 gʲoːʃɑ
+伝授 de̞ndʒɯ
+埋蔵 mɑizoː
+表皮 ço̞ɯhi
+ゴリー go̞ɾi
+澤村 sawamɯra
+特 to̞kɯ
+追究 t͡sɯikʲɯː
+スイッチャー sɯiʔtʃaː
+バイト bɑito
+俗物 zo̞kɯbɯt͡sɯ
+国語 ko̞kɯgo̞
+危惧 kigɯ
+靴下 kɯt͡sɯʃitɑ
+笑み e̞mi
+電子 de̞nʃi
+商標 ʃo̞ɯço̞ɯ
+継続 keːzo̞kɯ
+嬉しかっ ɯreʃikɑʔ
+勝ち kɑtʃi
+脱ぎ nɯgi
+音質 o̞nʃit͡sɯ
+ベータ beːtɑ
+満開 mɑnkɑi
+九州大学 kʲɯːʃɯː dɑigɑkɯ
+大衆 tɑiʃɯː
+泰弘 jasɯiɾo
+兵士 he̞ːʃi
+有史 jɯːʃi
+だとすれば datosɯreba
+ブーブー bɯːbɯː
+前金 mɑekin
+経る he̞rɯ
+特集 to̞kɯʃɯː
+相談 soːdɑn
+歌手 kɑʃɯ
+不便 ɸɯbe̞n
+指数 ʃisɯː
+大胆 dɑitɑn
+もってのほか moʔtenohokɑ
+ふつう ɸɯt͡sɯː
+選ん erɑn
+ニプロ ni pɯro̞
+あわて a ɯate
+浄水 ʒoːsɯi
+キャプテン kʲɑpɯten
+ポテト po̞teto̞
+降りる o̞rirɯ
+いずみ izɯmi
+政局 seːkjokɯ
+果たせ hɑtɑse
+服装 fɯkɯsoː
+巻き込む mɑkikomɯ
+真剣 ʃinke̞n
+恐喝 kʲoːkɑt͡sɯ
+当たり前 ɑtɑrimɑe
+臭い kɯsai
+快調 kɑitʃoː
+意図 ito̞
+レスリング resɯɾiᵑgɯ
+嫁 jo̞me
+税務署 zeːmɯʃo̞
+衆議院 ʃɯːgiin
+引く手あまた hikɯte ɑ mɑtɑ
+唖然 ɑzen
+求人 kʲɯːdʒin
+泊まれる tomɑrerɯ
+ひと息 hito̞iki
+同音 do̞ɯo̞n
+海水浴 kaisɯijokɯ
+お子さん okosɑn
+韓 hɑn
+福祉 ɸɯkɯʃi
+竜也 tɑt͡sɯjɑ
+祈り ino̞ri
+尽きる t͡sɯkirɯ
+決め手 kime̞te̞
+群れ mɯre̞
+忠信 tʃɯːʃin
+譲る jɯzɯrɯ
+惇 ɑt͡sɯʃi
+基本形 kiho̞nkeː
+毛布 mo̞ɯɸɯ
+真摯 ʃinʃi
+年間 nenkɑn
+空母 kɯːbo̞
+鍛える kitɑerɯ
+粗末 somatsɯɯ
+ほうれん草 ho̞ːɾenso̞ː
+を通じて ɯo t͡sɯːdʒite
+グローバル gɯroːbɑrɯ
+乾かす kawakasɯ
+積み上げ t͡sɯmiɑge
+かわいく kaɯaikɯ
+桶 o̞ke
+前半 zenhɑn
+ねー ne̞ː
+裸婦 rɑɸɯ
+じゃ dʒɑ
+触れ ɸɯre̞
+お参り omɑiri
+郡 gɯn
+ナイーブ nɑiːbɯ
+たく tɑkɯ
+競合 kʲo̞ɯgo̞ɯ
+収まる osamaɾɯ
+ホン ho̞n
+嫌悪 keno̞
+都道府県 to̞ do̞ɯ ɸɯ ken
+寛大 kɑndɑi
+貼り hɑri
+革 kaɯa
+与える ɑtɑerɯ
+ひねくれ hine̞kɯre̞
+おか okɑ
+れい子 reiko̞
+むろん mɯro̞n
+台詞 seɾifɯ
+参入 sɑ̃ɲɯː
+ボルドー bo̞rɯdo̞ː
+ときわ tokiɯa
+父 tʃitʃi
+絶た tɑtɑ
+眉 mɑjɯ
+簡易 kɑni
+受付 ɯke̞t͡sɯke̞
+なぜだか nɑze dɑ kɑ
+込み上げる komiɑgerɯ
+ダチョウ dɑtʃoː
+ドライブ dorɑibɯ
+ガスコンロ gasɯkonro
+スト sɯto
+ユアケー jɯɑkeː
+一眼 itʃigɑn
+真部 mɑnɑbe
+轟々 go̞ɯgo̞ɯ
+公式 ko̞ɯʃiki
+加えよ kɯɯaejo
+エピソード episo̞ːdo̞
+叩く tɑtɑkɯ
+知らず ʃirɑzɯ
+先頃 sɑkiɣoɾo
+寺島 terɑʃimɑ
+奇想天外 kisoːteᵑgɑi
+社民党 ʃɑmintoː
+動き出し ɯgo̞kideʃi
+心意気 ʃiniki
+心待ち kokoromɑtʃi
+梅 ɯme̞
+教材 kʲoːzɑi
+拠出 kʲo̞ʃɯt͡sɯ
+思える o̞mo̞erɯ
+杖 t͡sɯe̞
+山口 jɑmɑgɯtʃi
+鳥 to̞ri
+下回っ ʃitamaɯaʔ
+洗い流す aɾainaɣasɯ
+小泉 ko̞izɯmi
+カサカサ kɑsɑkɑsɑ
+弱み joɯami
+秋 ɑki
+メラノーマ merɑnoːmɑ
+ペン pe̞n
+上層 ʒo̞ːso̞ː
+気付く kizɯkɯ
+北西 hokɯseː
+みずほ mizɯho̞
+買え kɑe
+香り kɑori
+制し se̞ːʃi
+岡野 okɑno
+火元 himo̞to̞
+賑わっ nigiɯaʔ
+メートル meːto̞rɯ
+洋画 joːgɑ
+ラッキーアイテム rɑʔkiː ɑitemɯ
+身だしなみ midɑʃinɑmi
+朝食 tʃo̞ɯʃo̞kɯ
+励ん hɑgen
+何一つ nɑn hitot͡sɯ
+立春 ɾisɕɯn
+無実 mɯdʒit͡sɯ
+爆発 bɑkɯhɑt͡sɯ
+ウォルター orɯtɑː
+収め osɑme
+カツラ kɑt͡sɯrɑ
+飼料 ʃirʲo̞ɯ
+尚武 ʃo̞ɯbɯ
+反映 hɑneː
+対象 tɑiʃoː
+立て直す tatenaosɯ
+内分泌 nɑibɯnpit͡sɯ
+転化 tenkɑ
+職業 ʃo̞kɯgʲo̞ɯ
+ドクター dokɯtɑː
+コリドー ko̞rido̞ː
+上達 dʒoːtɑt͡sɯ
+ゴールデン go̞ːrɯden
+主導 ʃɯdo̞ɯ
+主力 ʃɯrʲo̞kɯ
+率いる hikiirɯ
+防災 boːzɑi
+大王製紙 dɑɪoːseːʃi
+朝帰り ɑsɑɣɑeɾi
+競り se̞ɾi
+セット seto̞
+特長 to̞kɯtʃo̞ɯ
+好演 ko̞ɯen
+コストパフォーマンス kosɯtopafoːmansɯ
+労働 ro̞ɯdo̞ɯ
+僕 bo̞kɯ
+目頭 megɑʃirɑ
+ベテラン beterɑn
+年度 nendo̞
+差損 sɑson
+統廃合 toːhɑigoː
+ケース keːsɯ
+印籠 inro̞ɯ
+ください kɯdasai
+各地 kɑkɯtʃi
+郵船 jɯːsen
+実弟 dʒiʔte̞ː
+式 ʃiki
+収益 ʃɯːe̞ki
+船長 sentʃo̞ː
+滴 ʃizɯkɯ
+薄型 ɯsɯɣata
+まもなく mɑ mo nɑkɯ
+囁か sɑsɑjɑkɑ
+妄想 mo̞ːso̞ː
+割 ɯari
+スピーディー sɯpiːdiː
+石田 iʃidɑ
+跳ね返っ hɑnekɑeʔ
+薬害 jɑkɯgɑi
+インタビュー intɑbʲɯː
+再犯 sɑihɑn
+授与 dʒɯjo̞
+資料 ʃirʲo̞ɯ
+雪辱 setsɯjokɯ
+メイク me̞ikɯ
+有難み jɯːgɑtɑmi
+容赦 joːʃɑ
+作者 sakɯʃa
+乱発 rɑnpɑt͡sɯ
+合意 go̞ɯi
+作れる t͡sɯkɯre̞rɯ
+曲がり mɑgɑri
+目次 mo̞kɯdʒi
+問いただす toitadasɯ
+バルサ baɾɯsa
+慶子 keːko̞
+下克上 geko̞kɯdʒo̞ɯ
+臨海 rinkɑi
+ただ今 tɑdɑ imɑ
+助詞 dʒo̞ʃi
+錯誤 sakɯɰᵝao
+飛ばし tobɑʃi
+ノネナール nonenɑːrɯ
+はずれ hɑzɯre
+輪 ɯa
+処遇 ʃo̞gɯː
+室内 ʃit͡sɯnɑi
+サンディエゴ sɑndieɡo
+空揚げ kɑrɑːge
+塗っ nɯʔ
+覚えれ o̞bo̞ere
+勇敢 jɯːkɑn
+勤め t͡sɯto̞me
+私服 ʃiɸɯkɯ
+目指す mezasɯ
+自ら mizɯkɑrɑ
+ラメ rɑme
+コナン konɑn
+アナタ ɑnɑtɑ
+かみ合っ kɑmiɑʔ
+和田 ɯada
+続き t͡sɯzɯki
+入れる ire̞rɯ
+歳出 saishɯtsɯ
+守 mɑmorɯ
+嘆い nɑgei
+握手 ɑkɯʃɯ
+枝豆 edɑmɑme
+排出 hɑiʃɯt͡sɯ
+当社 toːʃɑ
+水炊き mizɯtɑki
+オート o̞ːto̞
+職人 ʃo̞kɯnin
+絵本 eho̞n
+バビ bɑbi
+見極めれ mikiɯamere
+オフィス ofisɯ
+放映 ho̞ːeː
+厳戒 genkɑi
+かゆみ kɑjɯmi
+三顧 sɑnko
+亡くし nɑkɯʃi
+増やし ɸɯjɑʃi
+進ま sɯsɯma
+中印 tʃɯːin
+沸き ɯaki
+対外 tɑigɑi
+今日 kʲo̞ː
+尽くし t͡sɯkɯʃi
+いまどき imɑdoki
+久々 hisɑbisɑ
+はいっ hɑiʔ
+水先案内 mizɯsɑkiɑnɑi
+妹 imo̞ɯto̞
+双葉 ɸɯtɑbɑ
+ランドセル ɾandoseɾɯ
+悪魔 ɑkɯmɑ
+ログ ro̞gɯ
+加 kɑ
+実施 dʑiʔsi
+黄金 o̞ɯgo̞n
+ナイジェリア nɑidʒeriɑ
+こそ ko̞so̞
+想い o̞mo̞i
+廃物 hɑibɯt͡sɯ
+可能 kɑnoː
+休息 kjɯːsokɯ
+ハングリー hɑngɯriː
+競り勝つ seɾikatsɯ
+比べる kɯrɑberɯ
+健康 kenko̞ɯ
+健闘 kento̞ɯ
+回復 kɑiɸɯkɯ
+関門 kɑnmon
+見合っ miɑiʔ
+蹄 hizɯme̞
+窮地 kʲɯːtʃi
+書道 ʃo̞do̞ɯ
+番組 bɑngɯmi
+佐賀 sɑgɑ
+現に ge̞nni
+参事官 sɑndʒikɑn
+若年 dʒɑkɯnen
+内政 nɑiseː
+見守っ mimɑmoʔ
+慰謝 iʃɑ
+仕え t͡sɯkɑe
+成り nɑri
+素っ気 sɔʔke̞
+退社 tɑiʃɑ
+スイス sɯisɯ
+貧乏 binbo̞ɯ
+異なっ kotonɑʔ
+任じ nindʒi
+ドラえもん dorɑemon
+厳禁 ge̞nkin
+自分 dʒibɯn
+脅かす obijakasɯ
+やり直せる jaɾinaoseɾɯ
+後輩 koːhɑi
+よお jo̞ː
+モリー mo̞riː
+属する zokɯsɯɾɯ
+伸 ʃin
+オリジナリティ oridʒinɑriti
+いけ ike̞
+摂取 sesʃɯ
+業務 gʲo̞ɯmɯ
+買い取り kɑitori
+振り出し ɸɯridɑʃi
+伝達 dentɑt͡sɯ
+やむなく jɑmɯnɑkɯ
+プラモデル pɯrɑmoderɯ
+徹底的 te̞ʔte̞ːte̞ki
+ふり ɸɯri
+貰っ morɑʔ
+とろみ to̞ro̞mi
+注い so̞so̞j
+ロイヤル roijɑrɯ
+誇っ ho̞ko̞ʔ
+別状 bet͡sɯdʒo̞ɯ
+就任 ʃɯːnin
+開拓 kɑitɑkɯ
+作ろ t͡sɯkɯro̞
+ゲーム ge̞ːmɯ
+趣味 ʃɯmi
+命 ino̞tʃi
+特製 tokɯseː
+告示 ko̞kɯdʒi
+レンジ re̞ndʒi
+打ち合わせ ɯtʃiawase
+痩せる jaseɾɯ
+転じる te̞ndʒirɯ
+とくに to̞kɯni
+ノンフィクション no̞nɸikɯʃo̞n
+軒並み nokinɑmi
+場 bɑ
+ょ jo̞
+読み上げ jomiɑge
+爆弾 bɑkɯdɑn
+束ね tɑbɑne
+折り畳み oritɑtɑmi
+ハン hɑn
+盛岡大 moriokɑdɑi
+伸ばす nobasɯ
+カテナチオ kɑtenɑ tʃio
+入り iri
+平 tɑirɑ
+永尾 nɑgɑo
+手がけ tegɑke
+売掛金 ɯrikɑkekin
+ネイバージャパン neibɑː dʒɑpɑn
+坂本 sɑkɑmoto
+遅し o̞so̞ʃi
+定評 teːço̞ː
+やろ jɑro
+吉祥寺 kitʃidʒo̞ɯdʒi
+戦 ikɯsa
+ヤヤコシイ jɑjɑkoʃiː
+つぶやこ t͡sɯbɯjɑko
+淡々 tɑntɑn
+知っ ʃiʔ
+ベタつく betɑt͡sɯkɯ
+周期 ʃɯːki
+セ se̞
+あげれ ɑgere
+カネ kɑne
+文京 bɯnkʲo̞ɯ
+顔 kɑo
+仮面ライダー kɑmen rɑidɑː
+やけ jɑke
+多々 tɑtɑ
+なれる nɑrerɯ
+渡さ vɑtɑsɑ
+チークダンス tʃiːkɯdansɯ
+踏み切り ɸɯmikiri
+株価 kɑbɯkɑ
+樹脂 dʒɯʃi
+ひどかっ hidokɑʔ
+手配 tehɑi
+開花 kɑikɑ
+狙う nerɑɯ
+小栗 o̞gɯri
+悪材料 ɑkɯzɑirʲoː
+ギニア giniɑ
+犯す okasɯ
+芦崎 ɑʃisɑki
+伝え t͡sɯtɑe
+形成 kɑiseː
+プリント pɯrinto̞
+第三者 dɑizɑnʃɑ
+誤解 gokɑi
+満たす mitasɯ
+妻 t͡sɯmɑ
+判断 hɑndɑn
+古風 ko̞ɸɯː
+副業 ɸɯkɯgʲo̞ɯ
+選外 seᵝɑi
+冷却 reːkʲɑkɯ
+戻せ mo̞do̞ze
+原文 ge̞nbɯn
+混ん ko̞n
+インタビュアー intɑbʲɯɑː
+なぐら nɑgɯrɑ
+間違える mɑtʃigɑerɯ
+坪 t͡sɯbo̞
+もたらし motɑrɑʃi
+ひきかえ hikikɑe
+恵方 eho̞ɯ
+表れ araɯare
+スパイス sɯpaisɯ
+つながり t͡sɯnɑgɑri
+キューバ kʲɯːbɑ
+節目 ɸɯʃime̞
+ドレッシング doɾɛsʃiŋɡɯ
+込み ko̞mi
+武器 bɯki
+肝心 kɑndʒin
+咄嗟 tosɯɕɑ
+ヒジ hidʒi
+奇数 kisɯː
+西口 niʃigɯtʃi
+アラーム ɑrɑːmɯ
+不慣れ ɸɯnɑre
+署 ʃo̞
+街並み mɑtʃinɑmi
+寂しい sɑbiʃiː
+簗瀬 jɑnɑse
+メート meːto̞
+団長 dɑntʃoː
+忘年会 boːnen kɑi
+やりがい jɑrigɑi
+違和感 iɯa kan
+端 hɑʃi
+埋め込み ɯmeko̞mi
+投書 to̞ɯʃo̞
+クロス kɯɾosɯ
+なくし nɑkɯʃi
+傾げ kɑʃige
+一軍 itʃi gɯn
+了承 rʲo̞ɯʃo̞ɯ
+傾斜 keːʃɑ
+ボス bosɯ
+脱出 daʔʃɯt͡sɯ
+暴利 bo̞ɯri
+パスポート pasɯpoːto
+門限 mo̞ngen
+ステンレス sɯtenresɯ
+強固 kʲo̞ɯko̞
+番台 bɑndɑi
+党 to̞ɯ
+氷河期 çoːgɑ ki
+打ち出し ɯtʃidɑʃi
+満杯 mɑnpɑi
+ミツユビナマケモノ mit͡sɯjɯbinɑmɑkemono
+旋回 seŋkɑi
+窮屈 kʲɯːkɯt͡sɯ
+ごす gosɯ
+知的 tʃite̞ki
+私 ɯatakɯʃi
+紙面 ʃime̞n
+普段 ɸɯdɑn
+面前 me̞nze̞n
+寿司 sɯʃi
+虐殺 gɒkusatsu
+センセーショナル senseːʃonaɾɯ
+在日 zɑinitʃi
+本物 ho̞nmo̞no̞
+ほら horɑ
+名言 me̞ːge̞n
+茂 ʃige̞rɯ
+基本 kiho̞n
+売り出し ɯridɑʃi
+補っ oginɑʔ
+見抜い minɯi
+テレビ te̞re̞bi
+製作 seːsakɯ
+性器 se̞ːki
+多面 tɑmen
+産まれ ɯmɑre
+切り開い kirihirɑi
+堺 sɑkɑi
+下値 ʃitɑne
+中学 tʃɯːgɑkɯ
+スンニ sɯnni
+たたみ tɑtɑmi
+消石灰 ʃoːsekʔɑi
+進撃 ʃinge̞ki
+みすみす misɯmisɯ
+なにしろ nɑniʃiro
+羽幌 hɑboro
+乱れ midɑre
+シリーズ ʃiriːzɯ
+周囲 ʃɯːi
+両論 rʲo̞ɯro̞n
+毎年 mɑitoʃi
+飲み込み no̞miko̞mi
+シリアス ʃiɾiasɯ
+運輸 ɯɲjɯ
+終了 ʃɯːrʲo̞ɯ
+多重 tɑdʒɯː
+羨ましく ɯrɑjɑmɑʃikɯ
+ありふれ ɑriɸɯre
+様子 joːsɯ
+室長 ʃit͡sɯtʃo̞ɯ
+駆けつけ kɑket͡sɯke
+就職 ʃɯːʃo̞kɯ
+隠滅 inme̞t͡sɯ
+千 se̞n
+塩水 ensɯi
+同情 do̞ɯdʒo̞ɯ
+いわゆる iɯajɯrɯ
+機嫌 kige̞n
+窓 mɑdo
+君 kimi
+比べ kɯrɑbe
+チョキ tʃo̞ki
+ワールド ɯaːrɯdo
+コミュニケーション ko̞mʲɯnikeːʃo̞n
+咲い sɑi
+虚ろ ɯt͡sɯro̞
+魔性 mɑʃoː
+インフルエンザ inɸɯrɯenzɑ
+答志島 kotɑe ʃidʒimɑ
+ピチピチ pitʃipitʃi
+でしゃばら deʃɑbɑrɑ
+ぶつかる bɯt͡sɯkɑrɯ
+行い okonɑi
+上期 kɑmiki
+効き kiki
+かなっ kɑ nɑʔ
+お決まり okimɑri
+溜まり場 tɑmɑribɑ
+糸子 ito̞ko̞
+外科 gekɑ
+懸念 ke̞ne̞n
+混血 ko̞nket͡sɯ
+収まり osɑmɑɾi
+先制 se̞nse̞ː
+強権 kʲo̞ɯken
+行っ iʔ
+稿 ko̞ɯ
+動き出す ɯɡokidasɯ
+子持ち ko̞mo̞tʃi
+なう nɑɯ
+測る hɑkɑrɯ
+塔 to̞ɯ
+永 hisɑʃi
+リザルト rizɑrɯto
+東宮 to̞ɯgɯː
+朝飯 ɑsɑmeʃi
+なんぞ nɑnzo
+気分 kibɯn
+小腹 kobɑrɑ
+ドンキホーテ do̞nkiho̞ːte
+ツボ t͡sɯbo̞
+手間 temɑ
+済み sɯmi
+かずこ kɑzɯko
+手入れ te̞ire̞
+個 ko̞
+示す ʃimesɯ
+賑わし nigiɯaʃi
+ストロー sɯtoɾoː
+わかりやすさ wakaɾijasɯsa
+らし rɑʃi
+躊躇 tʃɯːtʃo̞
+生ん ɯn
+主治医 ʃɯdʒii
+従業 dʒɯːgʲo̞ɯ
+別 be̞t͡sɯ
+日系 niʔke̞ː
+たくましく tɑkɯmɑʃikɯ
+バラード bɑrɑːdo
+まとまる mɑtomɑrɯ
+頼ま tɑnomɑ
+語れる kɑtɑrerɯ
+アシスト aʃisɯto
+先発 senpatsɯɯ
+変 he̞n
+字 dʒi
+渡す vatasɯ
+四国 ʃiko̞kɯ
+協奏曲 kjoːsoːkjokɯ
+文字どおり mo̞dʒi do̞ːri
+上意下達 dʒoːi kɑtɑt͡sɯ
+東西 toːzɑi
+切り裂く kiɾisakɯ
+アヘン ɑhen
+ラウンダー rɑɯn dɑː
+前原 mɑehɑrɑ
+東洋 to̞ɯjo̞ɯ
+寝室 ʃinʃit͡sɯ
+墓 hɑkɑ
+生き残ら ikinokorɑ
+積み t͡sɯmi
+亡くなり nɑkɯnɑri
+群馬 gɯnmɑ
+介護 kɑigo
+お構い okɑmɑi
+自著 dʒitʃo̞
+金メダル kinmedɑrɯ
+療法 rʲo̞ɯho̞ɯ
+通信 t͡sɯːʃin
+使い切ら t͡sɯkɑi kirɑ
+大事 dɑidʒi
+一色 iʔʃokɯ
+うっ ɯʔ
+前途 zento̞
+催さ mojoːsɑ
+上げる ɑgerɯ
+せい se̞ː
+即 sokɯ
+置き場 okibɑ
+招く mɑnekɯ
+衣類 irɯi
+為替 kɑvɑze
+マウイ mɑɯi
+毅然 kize̞n
+紙袋 kɑmibɯkɯro
+バトル bɑtorɯ
+電工 denko̞ɯ
+晝休 hirɯ kʲɯː
+諸悪 ʃoɑkɯ
+いちど itʃi do̞
+けいこ keːko̞
+エンジ e̞ndʒi
+指名 ʃime̞ː
+メス mezɯ
+イメージ ime̞ːdʒi
+字幕 dʒimɑkɯ
+受付ける ɯke̞t͡sɯke̞rɯ
+漂わ tadajoɯa
+申 saɾɯ
+だから dɑkɑrɑ
+京田 kʲoːdɑ
+いつしか it͡sɯ ʃi kɑ
+ごちそう go̞tʃiso̞ː
+豊島 toʃimɑ
+粉飾 ɸɯnʃo̞kɯ
+ふさぎこん fɯsagikon
+録画 rokɯgɑ
+レミオロメン remio̞ro̞men
+ブレ bɯre̞
+なく nɑkɯ
+訪ねる tɑzɯnerɯ
+スヌーズ sɯnɯːzɯ
+飲み代 no̞miʃiro̞
+行政 goːseː
+朝鮮中央通信 tʃoːsentʃɯːoːtsɯːʃin
+ソウ so̞ː
+国対 kokɯtɑi
+下げよ sɑgejo
+そしり so̞ʃiɾi
+壁 kɑbe
+ひか hikɑ
+難色 nɑnʃokɯ
+送り込ま okɯrikomɑ
+切り替え kirikɑe
+質し tɑdɑʃi
+ヘタ hetɑ
+気丈 kidʒo̞ɯ
+爽茶 soːtʃɑ
+芝浦工業大学 ʃibɑɯrɑ koːgʲoː dɑigɑkɯ
+勝者 ʃoːʃɑ
+淑女 ʃɯkɯdʒo̞
+ミニスカート minisɯkaːto
+鉄道 tet͡sɯdo̞ɯ
+誇らしい hokorɑʃiː
+災い ɯazaɯai
+日の出 hino̞de
+病状 bʲo̞ɯdʒo̞ɯ
+祖母 so̞bo̞
+クーポン kɯːpo̞n
+めずらしい mezɯrɑʃiː
+ねじれ ne̞dʒire̞
+機関 kikɑn
+保阪 hosɑkɑ
+パフォーマー pɑɸoːmɑː
+磨い migɑi
+団地 dɑntʃi
+名跡 mjo̞ːseki
+撮 t͡sɯmɑmi
+誘惑 jɯːɯakɯ
+新聞紙 ʃinbɯnʃi
+齢 jowai
+にかけ ni kɑke
+香川 kagaɯa
+ヤバイ jɑbɑi
+チャーハン tʃɑːhɑn
+高之 tɑkɑjɯki
+津田 t͡sɯdɑ
+胸元 mɯnɑmoto
+息切れ ikigire̞
+間違っ mɑtʃigɑʔ
+漕い ko̞i
+后 ko̞ɯ
+答える kotɑerɯ
+気楽 kirɑkɯ
+ふっくら ɸɯʔkɯrɑ
+話芸 ɯageː
+劣後 ret͡sɯgo̞
+専門 sɛnmɔn
+シュトレーゼマン ʃɯtoreːzemɑn
+みたい mitɑi
+梅田 ɯmedɑ
+斜面 ʃɑmen
+想起 so̞ːki
+農地 no̞ɯtʃi
+リマンダー rimɑn dɑː
+作動 sɑdoː
+サービスクレンリネス saːbisɯkɯrenrinesɯ
+李 ri
+接続 setsɯzokɯ
+新種 ʃinʃɯ
+深呼吸 ʃinko̞kʲɯː
+俳優 hɑijɯː
+正規 se̞ːki
+治す naosɯ
+尾形 ogɑtɑ
+任さ mɑkɑsɑ
+損ね so̞ko̞ne
+町長 tʃo̞ɯtʃo̞ɯ
+ログイン ro̞gɯ in
+つら t͡sɯrɑ
+手続き te̞t͡sɯzɯki
+全量 zenrʲo̞ɯ
+会派 kɑihɑ
+潮田 ɯʃiodɑ
+咫烏 atakaɾasɯ
+マンション mɑnʃon
+大昔 oːmɯkɑʃi
+ケータイ keːtɑi
+食べ物 tɑbemono
+断わっ kotoɯaʔ
+善行 zenko̞ɯ
+電池 de̞ntʃi
+強まる t͡sɯjomɑrɯ
+賞金 ʃo̞ɯkin
+くわえ kɯɯae
+立ちはだかっ tɑtʃihɑdɑkɑʔ
+ワーキング ɯaːkingɯ
+その so̞no̞
+ひどく hido̞kɯ
+ハイペース haipeːsɯ
+継承 keːʃo̞ː
+合併 gɑʔpeː
+夜尿症 jɑɲoːʃoː
+止めれ to̞mere
+受身 ɯke̞mi
+責め se̞me̞
+ことさら kotosɑɾɑ
+オーストラリア oːsɯtoɾaɾja
+題字 dɑidʒi
+副 ɸɯkɯ
+水門 sɯimon
+何らかの nɑnrɑ kɑ no
+司法研修所 ʃiho̞ɯ kenʃɯːʃo̞
+イニシャル iniʃɑrɯ
+キティ kiti
+つ t͡sɯ
+酷評 ko̞kɯço̞ɯ
+首位 ʃɯi
+事後 dʒigo̞
+押す osɯ
+有害 jɯːgɑi
+法則 hoːsokɯ
+法務局 ho̞ɯmɯ kʲo̞kɯ
+崩し kɯzɯʃi
+選り好み erigo̞no̞mi
+意 i
+かから kɑkɑrɑ
+ヘモグロビン hemo̞gɯro̞bin
+取り上げ toriɑge
+豆腐 to̞ɯɸɯ
+ボランティア borɑntiɑ
+結ん mɯsɯn
+廃墟 hɑikʲo
+蘭子 rɑnko
+ベルルスコーニ bərɯrɯsɯkoːni
+気象庁 kiʃo̞ɯtʃo̞ɯ
+載っ no̞ʔ
+沈み ʃizɯmi
+くすり kɯsɯɾi
+挟ま hɑsɑmɑ
+音楽 ongɑkɯ
+エロイ ero̞i
+後 ɑto
+一室 iʔʃitsɯ
+失し ʃɪsʃi
+実生活 dʒiʔseːkatsɯ
+スライド sɯɾaiðo
+電気 de̞nki
+チョコ tʃo̞ko̞
+甘く ɑmɑkɯ
+公取委 ko̞ːto̞riː
+ぎろっと giro̞ʔ to̞
+ベレゾフスキー berezofɯsɯkiː
+フォロー ɸo̞ro̞ː
+買い kɑi
+それでは soɾedeɯa
+自治体 dʒitʃi tɑi
+嘆く nɑgekɯ
+振る舞わ ɸɯrɯmaɯa
+伝統 dento̞ɯ
+家政 kɑseː
+歩き回っ arɯkimaɯaʔ
+町議 tʃo̞ɯgi
+内野 nɑijɑ
+なんせ nɑnse
+脅かさ obijɑkɑsɑ
+成績 se̞ːse̞ki
+好む ko̞no̞mɯ
+コース koːsɯ
+励み hɑgemi
+保有 ho̞jɯː
+後で ɑto de
+奇手 kiʃɯ
+英 e̞ː
+解決 kɑiket͡sɯ
+ねた netɑ
+押し o̞ʃi
+沿岸 engɑn
+風車 kɑzɑgɯrɯmɑ
+バンザイ bɑnzɑi
+藤井 ɸɯdʒii
+鳴海 nɑrɯmi
+マテ mɑte
+素敵 sɯteki
+近かっ tʃikɑkɑʔ
+走っ hɑʃiʔ
+サーキット sɑːkiʔto
+落とし穴 otoʃiɑnɑ
+太刀打ち tɑtʃiɯtʃi
+曜日 jo̞ɯbi
+ゆとり jɯto̞ri
+みそ miso̞
+下 ʃitɑ
+悪し ɑʃi
+勝てる kɑterɯ
+響き hibiki
+春男 hɑrɯo
+人身 dʒinʃin
+底堅 sokoɰᵝɑtɑ
+ネーミング ne̞ːmingɯ
+みえ mie̞
+起用 kijo̞ɯ
+辺 he̞n
+ブラウン bɯrɑɯn
+糾弾 kʲɯːdɑn
+出産 ʃɯʔsan
+ミラー mirɑː
+うる ɯrɯ
+飲み no̞mi
+堂々巡り do̞ɯdo̞ɯ megɯri
+お出かけ odekɑke
+温かい ɑtɑtɑkɑi
+冬空 ɸɯjɯzorɑ
+片面 kɑtɑmen
+増す masɯ
+堅気 kɑtɑgi
+飛島 tobiʃimɑ
+帝人 te̞ːdʒin
+マドリー mɑdoriː
+クオリティ kɯo̞riti
+玩具 gɑngɯ
+暗黙 ɑnmokɯ
+星空 hoʃizorɑ
+短期 tɑnki
+規約 kijɑkɯ
+被爆 hibɑkɯ
+復路 ɸɯkɯro̞
+ミネラル minerɑrɯ
+熱かっ ɑt͡sɯkɑʔ
+通 t͡sɯː
+郷愁 kʲo̞ɯʃɯː
+続編 zo̞kɯhen
+嬉しゅう ɯre̞ʃɯː
+うらやましかっ ɯrɑjɑmɑʃikɑʔ
+任務 ninmɯ
+たち tɑtʃi
+気張っ kibɑʔ
+あの世 ɑnojo
+貼る hɑrɯ
+右手 migite̞
+空腹 kɯːɸɯkɯ
+促進 sokɯʃɪn
+読め jo̞me
+地球 tʃikʲɯː
+安売り jasɯɯri
+飼い主 kɑinɯʃi
+尽く ko̞to̞go̞to̞kɯ
+のっける no̞ʔkerɯ
+理路 riro̞
+スティーブ sɯtiːbɯ
+陽 hi
+不適切 fɯtekisetsɯ
+おまる omɑrɯ
+一層 iʔso̞ː
+冷たく t͡sɯmetɑkɯ
+祖父 sofɯ
+望み no̞zo̞mi
+フランス fɯɾansɯ
+過す sɯɣosɯ
+紫 mɯɾasaki
+皆様 minɑsɑmɑ
+恵み me̞gɯmi
+送れ o̞kɯre
+決断 ket͡sɯdɑn
+桜 sakɯra
+ですから desɯkaɾa
+解明 kɑimeː
+寄稿 kiko̞ɯ
+女のコ onnɑnoko
+まま mɑmɑ
+禁物 kinmo̞t͡sɯ
+山の手 jɑmɑnote
+給料 kʲɯːrʲo̞ɯ
+忘れ vasɯɾe
+結び mɯsɯbi
+悩め nɑjɑme
+しばしば ʃibɑʃibɑ
+リアリティ riɑriti
+主君 ʃɯkɯn
+とき to̞ki
+明治大学 meːdʒi dɑigɑkɯ
+話し合い hɑnɑʃiɑi
+話し hɑnɑʃi
+つながら t͡sɯnɑgɑrɑ
+てろ tero̞
+文書 bɯnʃo̞
+育児 ikɯdʒi
+侵入 ʃinɲɯː
+逃げ切る nige̞ kirɯ
+食材 ʃokɯzɑi
+営利 e̞ːri
+静か ʃizɯkɑ
+今さら imɑsɑɾɑ
+折角 seʔkakɯ
+ごみ go̞mi
+おいで o̞ide
+根治 ko̞ndʒi
+手 te̞
+上手 dʒo̞ɯzɯ
+仕上がる ʃiɑgɑrɯ
+くゆらせ kɯjɯɾase
+ワル ɯarɯ
+言お io̞
+共産党 kjoːsɑntoː
+一人ひとり hito̞rihito̞ri
+若 ɯaka
+底堅く sokogatakɯ
+絵文字 e mo̞dʒi
+くだら kɯdɑrɑ
+残高 zɑndɑkɑ
+セーラー服 seːraːfɯkɯ
+極み kiɯami
+許せる jɯɾɯseɾɯ
+着 ki
+関口 sekigɯtʃi
+穀 ko̞kɯ
+宿所 ʃɯkɯʃo̞
+分かれ ɯakare
+ともあれ tomoɑre
+亡霊 bo̞ːreː
+吸盤 kʲɯːbɑn
+通せる tooseɾɯ
+愛する aisɯɾɯ
+員 in
+精度 seːdo̞
+乗り越え no̞riko̞e
+バスト basɯto
+蚊帳 kɑjɑ
+閑 kɑn
+乗客 dʒoːkʲɑkɯ
+埋め立て ɯmetɑte
+履く hɑkɯ
+なる nɑrɯ
+キャスト kjasɯto
+混乱 konrɑn
+囲ま kɑkomɑ
+慎 ʃin
+静岡 ʃizɯokɑ
+三陸 sanrikɯ
+占い ɯrɑnɑi
+異なる kotonɑrɯ
+組閣 sokakɯ
+マインド mɑindo
+キルト kirɯto̞
+合し go̞ɯʃi
+岸 kiʃi
+自由が丘 dʒijɯːgɑokɑ
+安値 jasɯne
+建材 kenzɑi
+雑種 zasʃɯ
+橋本 hɑʃimoto
+クラスメート kɯɾasɯmeːto
+ド do̞
+終わる oɯarɯ
+スパイラル sɯpaiɾaɾɯ
+パトカー pɑtokɑː
+とりわけ toriɯake
+戻れ mo̞do̞re
+狂わす kɯɾɯwasɯ
+アマチュア ɑmɑtʃɯɑ
+配属 hɑizokɯ
+減衰 gensɯi
+商状 ʃo̞ɯdʒo̞ɯ
+低く hikɯkɯ
+目的 mo̞kɯteki
+開発元 kɑihɑt͡sɯ moto
+病気 bʲo̞ɯki
+ドロドロ do̞ro̞do̞ro̞
+名古屋大 nɑgojɑdɑi
+復讐 ɸɯkɯʃɯː
+掛け声 kɑkegoe
+いっぱい iʔpɑi
+ケン ke̞n
+館 jɑkɑtɑ
+リモコン rimo̞ko̞n
+弘道 ko̞ɯdo̞ɯ
+ヤクルト jɑkɯrɯto
+無罪 mɯzɑi
+睡眠 sɯimin
+予期 jo̞ki
+漬物 t͡sɯkemo̞no̞
+台 dɑi
+監督 kɑntokɯ
+タイム tɑimɯ
+我 ɯare
+春画 ʃɯngɑ
+習志野 nɑrɑʃino
+大勢 o̞ːzeː
+ざま zɑmɑ
+成り立た nɑritɑtɑ
+嬢 dʒo̞ɯ
+会う ɑɯ
+親指 ojɑjɯbi
+利便 ribe̞n
+スピード sɯpiːdo
+風物詩 ɸɯːbɯt͡sɯ ʃi
+写し ɯt͡sɯʃi
+手塚 tezɯkɑ
+動画 doːgɑ
+気くばり kikɯbɑri
+騒ぐ savaɠɯ
+箭内 jɑnɑi
+にとり ni to̞ri
+会員 kɑiin
+栄 e̞ː
+したがって ʃitɑgɑʔte
+単位 tɑni
+最小限 sɑiʃoːɡen
+草分け kɯsavake
+メッセージ mesʔeːdʒi
+取り扱い toriɑt͡sɯkɑi
+取り戻そ to̞ɾimo̞ðo̞so̞
+賃料 tʃinrʲo̞ɯ
+タコ tɑko
+作り話 t͡sɯkɯribɑnɑʃi
+ただならぬ tɑdɑ nɑrɑnɯ
+苦しい kɯrɯʃiː
+記録 kiro̞kɯ
+殿 to̞no̞
+悪気 ɯarɯgi
+木造 mo̞kɯzo̞ɯ
+景子 keːko̞
+コバンザメ kobɑn zɑme
+構想 ko̞ːso̞ː
+こき下ろす kokioɾosɯ
+おもい o̞mo̞i
+一文字 itʃi mo̞dʒi
+焦 ʃo̞ɯ
+佳彦 jo̞ʃihiko̞
+午後 go̞go̞
+十 dʒɯː
+覚ます samasɯ
+従兄弟 ito̞ko̞
+抑える osaeɾɯ
+寝苦しい ne̞gɯrɯʃiː
+精 se̞ː
+数字 suːdʒi
+約束 jakɯsokɯ
+喜ん jo̞ro̞ko̞n
+ツナ t͡sɯnɑ
+韓米 kɑnbeː
+人気者 ninkiʃɑ
+習氏 ??
+箸 hɑʃi
+埋める ɯme̞rɯ
+チタ tʃitɑ
+貝 kɑi
+寂しく sabiʃikɯ
+聯合 rengo̞ɯ
+お母さん ohɑhɑsɑn
+カルシファーサルマン kaɾʊʃifaːsaɾɯman
+閃い hirɑmei
+旨い ɯmɑi
+対抗 tɑikoː
+コンソールアプリ konsoːrɯapɯri
+はせる haseɾɯ
+シュタルク ʃɯtɑrɯkɯ
+教えろ o̞ʃiero̞
+堂 do̞ɯ
+けなげ kenɑge
+武富士 tɑkeɸɯdʒi
+どころか dokoro kɑ
+ぶれ bɯre̞
+繁 ʃige̞rɯ
+要望 jo̞ɯbo̞ɯ
+温かく ɑtɑtɑkɑkɯ
+見比べ mikɯrɑbe
+到着 toːtʃɑkɯ
+岡本 okɑmoto
+入ら hɑirɑ
+切り出す kiɾidasɯ
+サビ sɑbi
+ふさわしい fɯsawaʃiː
+かえる kɑerɯ
+手合わせ teaɯaze
+洗練 sɛnrɛn
+金曜日 kiɲɯhi
+母 hɑhɑ
+ことば kotobɑ
+貯め tɑme
+対面 tɑimen
+菜園 sɑien
+出来事 dekigo̞to̞
+比較的 hikɑkɯteki
+後者 koːʃɑ
+借り物 kɑrimono
+池袋 ikebɯkɯro̞
+訓練 kɯnre̞n
+応じ o̞ɯdʒi
+無用 mɯjo̞ɯ
+あっという間に ɑʔ to iɯ mɑ ni
+輝い kɑgɑjɑi
+やっと jɑʔto
+持ち mo̞tʃi
+同居 do̞ɯkʲo̞
+英会話 ei kaiɯa
+なきゃ nɑkʲɑ
+徒花 ɑdɑbɑnɑ
+負傷 ɸɯʃo̞ɯ
+殺し ko̞ro̞ʃi
+項目 ko̞ɯmo̞kɯ
+欺瞞 gimɑn
+ブリヂストン bɯɾizisɯton
+退か ʃirizokɑ
+値 ɑtɑi
+絵空事 esorɑgoto
+発言 hɑt͡sɯgen
+読書 do̞kɯʃo̞
+カンザス kanzasɯ
+国土 ko̞kɯdo̞
+ローソン ɾo̞ːso̞n
+頂け itɑdɑke
+撒き mɑki
+極力 kʲo̞kɯrʲo̞kɯ
+曲 kʲo̞kɯ
+あがる ɑgɑrɯ
+操れる ɑjɑt͡sɯrerɯ
+右京 ɯkʲo̞ɯ
+片づけ kɑtɑzɯke
+ルーフ rɯːɸɯ
+貸出 kɑʃidɑʃi
+パターン pɑtɑːn
+富 to̞mi
+苦言 kɯge̞n
+にわかに niɯaka ni
+基地 kitʃi
+作ら t͡sɯkɯrɑ
+面白み o̞mo̞ʃiro̞mi
+秘書 hiʃo̞
+見込む miko̞mɯ
+そいつ sojtsɯ
+表題 çoːdɑi
+建設 ke̞nse̞tsɯ
+平蔵 heːzo̞ː
+はっ hɑʔ
+あり ɑri
+大詰め o̞ːzɯme
+先行 senko̞ː
+留保 rʲɯːho̞
+くれぐれも kɯregɯre mo̞
+浮かび上がっ ɯkɑbiɑgɑʔ
+諸 mo̞ro̞
+扇風機 senpɯːki
+杉谷 sɯgitani
+東京ドーム to̞kʲo̞ do̞ːmɯ
+売買 bɑibɑi
+ウロコ ɯro̞ko̞
+捕っ to̞ʔ
+エンケ e̞n ke̞
+ユ jɯ
+肩身 kɑtɑmi
+ん n
+大役 tɑijɑkɯ
+くだけ kɯdɑke
+抱え kɑkɑe
+ほのか honokɑ
+ぶ bɯ
+応接 o̞ːsetsɯ
+ティー tiː
+龍之介 rjɯːnosɯke
+打ち切っ ɯtʃikiʔ
+仕事場 ʃigotobɑ
+薬湯 jɑkɯtoː
+消却 ʃoːkʲɑkɯ
+エリア eriɑ
+上がれ ɑgɑre
+さい sɑi
+道 mitʃi
+後始末 ɑto ʃimɑt͡sɯ
+思う存分 o̞mo̞ɯ zo̞nbɯn
+天ぷら tenpɯrɑ
+甘酒 ɑmɑzɑke
+カウンセラー kaɯnseɾaː
+ナチュラルメイクブーム nɑtʃɯrɑrɯ meikɯ bɯːmɯ
+見上げ miɑge
+古 ɸɯrɯ
+再婚 sɑikon
+頷く ɯnɑzɯkɯ
+嫌わ ija ɯa
+大谷 oːjɑ
+節度 setsɯðo
+プレミア pɯremiɑ
+お答え okotɑe
+食 ʃo̞kɯ
+文明 bɯnme̞ː
+演劇 e̞nge̞ki
+かみしめ kɑmiʃime
+飛散 hisɑn
+掘っ ho̞ʔ
+メンテナンス mentenansɯ
+初子 ɯigo̞
+派遣 hɑken
+豪快 goːkɑi
+アーカイブ ɑːkɑibɯ
+試験 ʃike̞n
+ず zɯ
+ペリエ pe̞rie̞
+直前 tʃo̞kɯzen
+ノミネート no̞mineːto̞
+平年 he̞ːne̞n
+幹雄 mikio̞
+アキ子 ɑkiko
+故意 ko̞i
+枕 mɑkɯrɑ
+ファン ɸɑn
+バリエーション bɑrieːʃon
+古閑 kogɑ
+はじめまして hɑdʒimemɑʃite
+つけれ t͡sɯke̞re̞
+橋 hɑʃi
+さすが sasɯɣa
+助け舟 tasɯkebɯne
+大 o̞ː
+通す toosɯ
+により ni jo̞ri
+実用 dʒit͡sɯjo̞ɯ
+怒り ikɑri
+放棄 ho̞ɯki
+脳天 no̞ɯten
+保夫 jasɯo
+取りとめる to̞rito̞merɯ
+ほっ ho̞ʔ
+損保 so̞npo̞
+出来る de̞kirɯ
+省令 ʃo̞ːreː
+飯田 iːdɑ
+ダイヤモンド dɑijɑmondo
+赤い ɑkɑi
+興奮 ko̞ɯɸɯn
+一輪車 itʃirinʃɑ
+移り気 ɯt͡sɯrigi
+虫 mɯʃi
+線描 senbjo̞ː
+お祈り o̞ino̞ri
+させ sɑze
+層 so̞ː
+頑固 gɑnko
+藤 ɸɯdʒi
+謙虚 kenkʲo̞
+小麦粉 komɯgi konɑ
+チャリティ tʃɑriti
+エロ ero̞
+鑑識 kɑnʃiki
+受賞 dʒɯʃo̞ɯ
+マガジン mɑgɑdʒin
+与党 jo̞to̞ɯ
+集結 ʃɯːke̞t͡sɯ
+むやみ mɯjɑmi
+原作 gensakɯ
+急落 kʲɯːrɑkɯ
+二流 nirʲɯː
+要約 joːjɑkɯ
+非常時 hidʒo̞ɯ dʒi
+取れ to̞re
+導入 do̞ɯɲɯː
+同時 do̞ɯdʒi
+胸板 mɯnɑitɑ
+森 mo̞ri
+ネットワーク neʔtoɯaːkɯ
+水彩 sɥizɑi
+ナミ nɑmi
+未亡人 mibo̞ɯdʒin
+反転 hɑnten
+天下 tenkɑ
+ポエム po̞emɯ
+隠れ kɑkɯre
+王手 o̞ɯte
+無茶 mɯtʃɑ
+プロレス pɯɾoɾesɯ
+ほんの ho̞nno̞
+是非 ze̞hi
+なっ nɑʔ
+二の句 nino̞kɯ
+木 ki
+いったい iʔtɑi
+然 sɑ
+大学生 daigakɯseː
+獣 kemo̞no̞
+あふれる ɑɸɯrerɯ
+冗談口 dʒoːdɑn kɯtʃi
+小首 ko̞kɯbi
+強める t͡sɯjo̞merɯ
+夫婦 ɸɯːɸɯ
+リザ rizɑ
+赤紙 ɑkɑgɑmi
+糸井 ito̞i
+向かお mɯkɑo
+舫米倉 mojɑi jonekɯrɑ
+人骨 dʒinko̞t͡sɯ
+貢献 ko̞ɯken
+レディ re̞di
+つまり t͡sɯmɑri
+イノベーション ino̞beːʃo̞n
+意味合い imiɑi
+レンジャーズ rendʒɑːzɯ
+指し示す saʃiʃimesɯ
+終ら oɯara
+憎め nikɯme̞
+湯水 jɯmizɯ
+オナニープレイ onɑniː pɯrei
+褒美 ho̞ɯbi
+踏み込め ɸɯmiko̞me
+味わう adʒiɯaɯ
+ヒーン hiː n
+ほっしん ho̞ʔʃin
+乏しく to̞bo̞ʃikɯ
+界 sɑkɑi
+自活 dʒikɑt͡sɯ
+生け簀 ikesɯ
+ドキッと do̞kit to̞
+取り締まり toriʃimɑri
+成長 seːtʃoː
+ちまっ tʃimɑʔ
+映る ɯt͡sɯrɯ
+和幸 kɑzɯjɯki
+毒舌 do̞kɯzet͡sɯ
+立ち戻ろ tɑtʃimodoro
+恋人 ko̞ibito̞
+南西 nɑnseː
+返礼 he̞nre̞ː
+ほどなく hodo nɑkɯ
+歳 to̞ʃi
+極め kiɯame
+妥協 dɑkʲoː
+面し me̞nʃi
+肝要 kɑnjoː
+つなぐ t͡sɯnɑgɯ
+毎夜 mɑijo
+見立て mitɑte
+渋々 ʃibɯʃibɯ
+重視 dʒɯːʃi
+くら kɯrɑ
+大会 tɑikɑi
+九 kʲɯː
+鷲掴み ɯaʃizɯkami
+永井 nɑgɑi
+ダニ dɑni
+帰る kɑerɯ
+ポン酢 po̞nzɯ
+ずるい zɯrɯi
+ウナギ ɯnɑgi
+期日 kidʒit͡sɯ
+実在 dʒit͡sɯzɑi
+猛 tɑkeʃi
+仕切っ ʃikiʔ
+同感 doːkɑn
+走ら hɑʃirɑ
+ドキドキバクバク dokidoki bɑkɯbɑkɯ
+説 setsɯzɯ
+内部 nɑibɯ
+ユニ jɯni
+成り立ち nɑritɑtʃi
+出来栄え dekibɑe
+弊社 heːʃɑ
+先着 sentʃakɯ
+ズラ zɯrɑ
+国籍 kokɯseki
+一言 hito̞ko̞to̞
+ばっち bɑʔtʃi
+ライフ rɑiɸɯ
+祖父母 sofɯbo
+面倒くさい mendoːkɯsai
+京葉 keːjo̞ː
+さりとて sɑɾitote
+突か t͡sɯkɑ
+地酒 dʒizɑke
+ゴールイン go̞ːrɯ in
+手掛かり tegɑkɑri
+胴 do̞ɯ
+明るい ɑkɑrɯi
+そりゃ soɾjɑ
+ついて t͡sɯite̞
+捧げる sasagerɯ
+ノート no̞ːto̞
+ちなみに tʃinɑmi ni
+地所 dʒiʃo̞
+パノラマ pɑnorɑmɑ
+紆余曲折 ɯjokjokɯsetsɯ
+儲け mo̞ɯke
+歩幅 hohɑbɑ
+せよ sejo̞
+じつは dʒit͡sɯ ɯa
+極端 kʲokɯtɑn
+ビリー biriː
+チャート tʃɑːto
+正しい tɑdɑʃiː
+苦い nigɑi
+覚える o̞bo̞erɯ
+デーブ de̞ːbɯ
+スタイル sɯtaiɾɯ
+ショーマン ʃoː mɑn
+マスク masɯkɯ
+簡略 kɑnrʲɑkɯ
+消す kesɯ
+関与 kɑɲo
+もとより mo̞to̞jo̞ri
+服 ɸɯkɯ
+別に be̞t͡sɯ ni
+硝 ?
+歯がゆかっ hɑgɑjɯkɑʔ
+引き込む hikiko̞mɯ
+佐野 sɑno
+キラー kirɑː
+罠 ɯana
+交え mɑdʒie
+洗い ɑrɑi
+帰っ kɑeʔ
+やす jasɯ
+ハウス haɯsɯ
+長続き nɑgɑt͡sɯzɯki
+収める osameɾɯ
+人寄せ hito̞jo̞se
+歌人 kɑdʒin
+三上 mikɑmi
+出来上がる dekiɑgɑrɯ
+天皇 tenno̞ɯ
+立たさ tɑtɑsɑ
+肩書き kɑtɑgɑki
+いたっ itɑʔ
+呈する teːsɯɾɯ
+川崎 kɑvɑsɑki
+直さ nɑosɑ
+肉 nikɯ
+登校 to̞ɯko̞ɯ
+モーニング mo̞ːningɯ
+お飾り okɑzɑri
+残っ no̞ko̞ʔ
+還暦 kɑnreki
+味わい adʒiɯai
+サヤ sɑjɑ
+アウディ ɑɯdi
+ちゃん tʃɑn
+かかれ kɑkɑre
+エレク e̞re̞kɯ
+忙し isoɡɑʃi
+婚前 ko̞nzen
+もともと mo̞to̞mo̞to̞
+アクシデント ɑkɯʃidento
+圧勝 ɑʔʃoː
+志 kokorozɑʃi
+奏でる kɑnɑderɯ
+各 kɑkɯ
+上地 dʒo̞ɯtʃi
+さりげ sɑɾixe
+福岡 ɸɯkɯokɑ
+後部 ko̞ɯbɯ
+青天の霹靂 seːtenno̞hekireki
+カバン kɑbɑn
+黒田 kɯrodɑ
+民心 minʃin
+長話 nɑgɑbɑnɑʃi
+セキュリティー sekjɯritiː
+パネル pɑnerɯ
+小数点 ʃoːsɯːten
+土鍋 donɑbe
+飾り付け kɑzɑrit͡sɯke
+弾き語り hikigɑtɑri
+煙たい kemɯtɑi
+本番 honbɑn
+中大 tʃɯːdɑi
+連想 ɾenso̞ː
+インテリア interiɑ
+見舞い mimɑi
+電灯 dento̞ɯ
+お寺 oterɑ
+書け kɑke
+問 mo̞n
+深く ɸɯkɑkɯ
+チン tʃin
+ゴーン go̞ːn
+捻出 ne̞nʃɯt͡sɯ
+接合 setsɯgo̞ː
+大震災 dɑiʃinsɑi
+結実 ke̞t͡sɯdʒit͡sɯ
+ツール t͡sɯːrɯ
+防 bo̞ɯ
+生かし ikɑʃi
+論争 ronsoː
+焼き魚 jɑkizɑkɑnɑ
+良し jo̞ʃi
+っ ʔ
+コーナー koːnɑː
+コーティング ko̞ːtingɯ
+傷つい kizɯt͡sɯi
+漫 mɑn
+木質 mo̞kɯʃit͡sɯ
+でる de̞rɯ
+意志 iʃi
+琴 ko̞to̞
+ラスト rasɯto
+軽自動車 keːdʒidoːʃɑ
+そうすると soːsɯrɯto
+はっきり hɑʔkiri
+目下 meʃitɑ
+殺 satsɯsɯ
+バオ bɑ o
+おかしな okɑʃinɑ
+騒動 so̞ːdo̞ː
+ギャング gʲɑngɯ
+光 hikɑri
+世話 sevɑ
+タテ tɑte
+いただける itɑdɑkerɯ
+さっぱり sɑʔpɑɾi
+存続 sonzokɯ
+奢っ o̞go̞ʔ
+コーナ koːnɑ
+サークル saːkɯɾɯ
+笑っ ɯaraʔ
+散る tʃirɯ
+提示 te̞ːdʒi
+以前 ize̞n
+とこ to̞ko̞
+宅配 tɑkɯhɑi
+見せ場 misebɑ
+つきあい t͡sɯkiɑi
+生死 se̞ːʃi
+絆 kizɯnɑ
+あさましい ɑsɑmɑʃiː
+あえて ɑete
+殺す koɾosɯ
+繊細 sensɑi
+きつい kit͡sɯi
+卵焼き tɑmɑgojɑki
+おもろい o̞mo̞ro̞i
+渋み ʃibɯmi
+怒る o̞ko̞rɯ
+労相 ro̞ɯʃo̞ɯ
+スイッチ sɯitʃi
+慈善 dʒize̞n
+リズボン ri zɯbo̞n
+攻防 ko̞ɯbo̞ɯ
+傷口 kizɯgɯtʃi
+部屋 hejɑ
+国民 ko̞kɯmin
+ロビン ro̞bin
+少額 ʃoːgɑkɯ
+探し出し sɑgɑʃidɑʃi
+アイメイク ɑi meikɯ
+暮らし kɯrɑʃi
+飛び火 to̞bihi
+仲買 nɑkɑgɑi
+薄商い ɯsɯakinai
+モーグル mo̞ːgɯrɯ
+クリーン kɯriːn
+極意 go̞kɯi
+ほふく ho̞ɸɯkɯ
+出資 ʃɯsʃi
+鳴る nɑrɯ
+信じれ ʃindʒire̞
+修学旅行 ʃɯːgɑkɯ rʲokoː
+能 no̞ɯ
+他社 tɑʃɑ
+解放 kɑihoː
+ぶっ飛ばす bɯʔtobasɯ
+眠りこけ nemɯriko̞ke
+上京 dʒo̞ɯkʲo̞ɯ
+静電気 se̞ːde̞nki
+段ボール dɑn boːrɯ
+きちんと kitʃinto̞
+得策 tokɯsakɯ
+君臨 kɯnrin
+準決勝 dʑɯŋkɛʔʃoː
+修士 ʃɯːʃi
+都築 t͡sɯzɯki
+新入 ʃinɲɯː
+刻ん kizɑn
+敵 te̞ki
+吉原 joʃiɯara
+多い o̞ːi
+夕張 jɯːbɑri
+誤っ ɑjɑmɑʔ
+細身 ho̞so̞mi
+確か tɑʃikɑ
+町内 tʃoːnɑi
+うつ病 ɯt͡sɯbʲo̞ɯ
+打撃 dɑgeki
+台地 dɑitʃi
+節減 sɛtsɯgɛn
+はたち hɑtɑtʃi
+ご存じ go̞zo̞ndʒi
+貞治 dʒo̞ɯdʒi
+究めよ kiɯamejo
+何より nɑn jori
+釧路 kɯʃiro̞
+心配 ʃinpɑi
+火蓋 hibɯtɑ
+二度と ni do̞ to̞
+茶の間 tʃɑnomɑ
+覗か nerɑi kɑ
+コメ ko̞me
+年初来 nenʃorɑi
+獲物 emo̞no̞
+刻み kizɑmi
+採掘 saikɯtsɯ
+強迫 kʲoːhɑkɯ
+失い nɑi
+厚み ɑt͡sɯmi
+大した tɑiʃitɑ
+好物 ko̞ɯbɯt͡sɯ
+ヤーマン jɑːmɑn
+団 dɑn
+報道 ho̞ɯdo̞ɯ
+余裕 jo̞jɯː
+けっこう keʔko̞ɯ
+値上がり neɑgɑri
+美空 misoɾɑ
+コンテンツ ko̞ntent͡sɯ
+尊厳 so̞ndʑɯən
+目指し mezɑʃi
+ポワッ poɯaʔ
+回遊 kɑijɯː
+落選 ɾɐkɯzɯn
+辞書 dʒiʃo̞
+幸平 ko̞ːheː
+双子 ɸɯtɑgo
+来よ ko̞jo̞
+肩車 kɑtɑgɯrɯmɑ
+ロシア roʃiɑ
+勢 ikio̞i
+多数 tasɯː
+生き延びる ikino̞birɯ
+将 ʃo̞ɯ
+宿敵 ʃɯkɯte̞ki
+水路 sɯiɾo
+アルバム ɑrɯbɑmɯ
+著しく itʃidʒirɯʃikɯ
+洗い場 ɑrɑibɑ
+マグマ mɑgɯmɑ
+ツインタワービル t͡sɯin taɯaː birɯ
+ピアニズム piɑnizɯmɯ
+ナス nasɯ
+呼ぶ jo̞bɯ
+南北 nɑnbokɯ
+むかっ mɯkɑʔ
+付け t͡sɯke̞
+至る itɑrɯ
+娘 mɯsɯme
+朝 ɑsɑ
+品川 ʃinagaɯa
+吉野家 jo̞ʃino̞ ie
+蓄電池 tʃikɯde̞ntʃi
+副産物 fɯkɯsanbɯtsɯ
+超越 tʃo̞ɯet͡sɯ
+取り崩し to̞rikɯzɯʃi
+すら sɯɾa
+器官 kikɑn
+チャレンジ tʃɑrendʒi
+大リーガー dɑiriːgɑː
+簡単 kɑntɑn
+伊勢神宮 iseʒiᵑgɯː
+反省 hɑnseː
+懲役 tʃo̞ɯeki
+尚 nɑo
+貧困 hinko̞n
+ツムラ t͡sɯmɯrɑ
+徹夜 tet͡sɯjɑ
+とともに to̞ to̞mo̞ ni
+が gɑ
+旭硝子 asahigarasɯ
+ワカサギ ɯakasagi
+浦和 ɯraɯa
+煙草 tɑbɑko
+こなせる konaseɾɯ
+など nɑdo
+東京女子大学 tokʲo dʒoʃi dɑigɑkɯ
+楽しみ tɑnoʃimi
+一線 ise̞n
+与し kɯmiʃi
+チカ tʃikɑ
+凝らし korɑʃi
+八つ jo̞ɯt͡sɯ
+おずおず o̞zɯo̞zɯ
+苦しん kɯrɯʃin
+因みに tʃinɑmi ni
+みな minɑ
+いわば iɯaba
+買わ kaɯa
+自信 dʒiʃin
+広がる hirogɑrɯ
+目線 me̞ze̞n
+楽しも tɑnoʃimo
+情景 dʒo̞ɯkeː
+すさまじい susamadʒiː
+整う to̞to̞no̞ɯ
+一 itʃi
+外海 gɑikɑi
+空間 kɯːkɑn
+飛行機 hiko̞ɯ ki
+官 kɑn
+海水 kaisɯi
+悪質 ɑkɯʃit͡sɯ
+遠く to̞ːkɯ
+人的 dʒinte̞ki
+与えよ ɑtɑejo
+犯罪 hɑnzɑi
+ツケ t͡sɯke̞
+高まる tɑkɑmɑrɯ
+停電 te̞ːde̞n
+思い切っ o̞mo̞ikiʔ
+認可 ninkɑ
+坊主 bo̞ɯzɯ
+恋しく ko̞iʃikɯ
+アクセサリー akɯsesariː
+採点 sɑiten
+思い出せ omoidɑse
+御仁 go̞dʒin
+フィルタ ɸirɯtɑ
+フェス fezɯ
+配管 hɑikɑn
+古田 ɸɯrɯtɑ
+ホーマー hoːmɑː
+地区 tʃikɯ
+ボーナス boːnasɯ
+展 te̞n
+ソ連 so̞ɾen
+洋菓子 joː kɑʃi
+わき役 ɯakijakɯ
+仕組 ʃikɯmi
+所在 ʃozɑi
+まき mɑki
+つて t͡sɯte̞
+超過 tʃoːkɑ
+タンチョウ tɑntʃoː
+パスワード pasɯwaːdo
+戸田 todɑ
+港町 minɑtomɑtʃi
+拒否 kʲo̞hi
+スポーティ sɯpoːti
+すれ違い sɯɾetʃiɣai
+浮かべ ɯkɑbe
+落ち込み o̞tʃiko̞mi
+代償 dɑiʃoː
+卒業 sotsɯgjoː
+個室 ko̞ʃit͡sɯ
+ワゴン ɯagon
+隆史 tɑkɑʃi
+隆志 tɑkɑʃi
+避難 hinɑn
+帰り kɑeri
+険しい keɯaʃiː
+リテラシー riterɑʃiː
+宛て ɑte
+装飾 soːshokɯ
+精緻 se̞ːki
+ピン pin
+こだわり kodaɯari
+イトカワ ito kaɯa
+天井 tendʒo̞ɯ
+大成 tɑiseː
+のぞい no̞ zo̞i
+共助 kʲo̞ɯdʒo̞
+関心 kɑnʃin
+置か okɑ
+迎え mɯkɑe
+眼 me̞
+勘 kɑn
+注射 tʃɯːʃɑ
+ドライヤー dorɑijɑː
+カナダ kɑnɑdɑ
+動 do̞ɯ
+王者 oːdʒɑ
+大川 oːkaɯa
+泣き nɑki
+つぶせ tsɯbɯze
+やって来 jɑʔte ki
+悲しむ kɑnɑʃimɯ
+親子 ojɑko
+サワー saɯaː
+送り込む o̞kɯriko̞mɯ
+遺体 itɑi
+飛び回る tobimaɯarɯ
+動く ɯgo̞kɯ
+挙げれ ɑgere
+朝日新聞 asahiʃinbɯn
+上回っ ɯɯamaɯaʔ
+用件 jo̞ɯken
+青森 ɑomori
+忙しい isoɡɑʃiː
+配達 hɑitɑt͡sɯ
+株 kɑbɯ
+洋子 jo̞ɯko̞
+当地 to̞ɯtʃi
+たとえば tɑtoebɑ
+蓮 re̞n
+去年 kʲo̞nen
+神社 dʒindʒɑ
+エレベーター erebeːtɑː
+総意 so̞ːi
+森川 morikaɯa
+胃袋 ibɯkɯro̞
+煤 sɯsɯ
+吹き抜ける ɸɯkinɯke̞rɯ
+付箋 fɯzen
+インフラ inɸɯrɑ
+嫌がり ijɑgɑri
+奉仕 ho̞ɯʃi
+合算 gɑsɑn
+無念 mɯne̞n
+シャワー ʃaɯaː
+過度 kɑdo
+耐え tɑe
+待ち望ま mɑtʃinozomɑ
+コラム korɑmɯ
+/> mɑ
+コンセント ko̞nsento̞
+いわき iɯaki
+リング ringɯ
+読み取り jo̞mito̞ri
+奈々 nɑnɑ
+もたらす motaɾasɯ
+再考 sɑikoː
+ホゴ ho̞go̞
+伺っ ɯkɑgɑʔ
+八木 jɑgi
+学べる mɑnɑberɯ
+炉 ro̞
+喋れ ʃɑbere
+機動 kido̞ɯ
+つかみ t͡sɯkɑmi
+豊田 tojodɑ
+須賀 sɯka
+断る kotoɯarɯ
+竹内 tɑkeɯtʃi
+ファンデーション ɸɑndeːʃon
+細かっ hosokɑʔ
+サイクロン saikɯɾon
+前回 zenkɑi
+付近 ɸɯkin
+編集 he̞nʃɯː
+女川原子力発電所 onagaɯagenʃirʲokɯhat͡sɯdenʃo
+とっ to̞ʔ
+窓際 madogiɯa
+教わっ osoɯaʔ
+最善 sɑizɯn
+連中 re̞ntʃɯː
+配っ kɯbɑʔ
+街灯 gɑitoː
+トーク to̞ːkɯ
+飲み物 no̞mimo̞no̞
+鮮烈 se̞nre̞tsɯ
+シューズ ʃɯːzɯ
+安穏 ɑnnon
+お客様 okjakɯsama
+領土 rʲo̞ɯdo̞
+離せ hɑnɑse
+敗戦 hɑisen
+転ぶ ko̞ro̞bɯ
+いらだつ irɑdɑt͡sɯ
+まあ mɑː
+無敵 mɯte̞ki
+司令塔 ʃirei to̞ː
+おっ o̞ʔ
+ずつ zɯt͡sɯ
+一明 itʃime̞ː
+ヘコ he ko̞
+予告編 jo̞ko̞kɯ hen
+原稿 genko̞ɯ
+宅急便 tɑkɯ kʲɯːbin
+夜中 jonɑkɑ
+澤田 saɯada
+叱ら ʃikɑrɑ
+連絡 renrɑkɯ
+居心地 igo̞ko̞tʃi
+極東 kʲo̞kɯto̞ɯ
+西部 seːbɯ
+出口 de̞gɯtʃi
+役人 jɑkɯnin
+異論 iro̞n
+痛し itɑʃi
+ぼんくら bonkɯrɑ
+小高い kodɑkɑi
+敬服 ke̞ːɸɯkɯ
+豪華 goːkɑ
+何故か nɑze kɑ
+駆けつける kɑket͡sɯkerɯ
+ごちそうさま gotʃisoːsɑmɑ
+リーズナブル riːzɯnɑbɯrɯ
+センサー sensɑː
+美術館 bidʒɯt͡sɯkɑn
+シングル ʃingɯrɯ
+彈 ?
+于文革 ɯ bɯnkɑkɯ
+聲 ?
+心得 ko̞ko̞ro̞e
+プーチン pɯːtʃin
+小向 komɯkɑi
+銅像 do̞ɯzo̞ɯ
+信彦 no̞bɯhiko̞
+悲しみ kɑnɑʃimi
+金属 kinzo̞kɯ
+掛ける kɑkerɯ
+浮き立つ ɯkitɑt͡sɯ
+コメンテーター komenteːtɑː
+スープ sɯːpɯ
+欄 rɑn
+おき o̞ki
+吸い出す sɯidasɯ
+パーソン pɑːson
+親御 ojɑgo
+露天商 ro̞ten ʃo̞ɯ
+筆 ɸɯde̞
+キュッ kʲɯ
+称する ʃoːsɯɾɯ
+遥か hɑrɯkɑ
+容易 jo̞ɯi
+させる saseɾɯ
+余念 jo̞nen
+宇宙船 ɯtʃɯːsen
+いつ it͡sɯ
+ネクタイ nekɯtɑi
+表明 ço̞ɯme
+つらい t͡sɯrɑi
+精機 se̞ːki
+ともう to̞ mo̞ɯ
+ハイテク hɑitekɯ
+踊ら odorɑ
+製 se̞ː
+植民 ʃo̞kɯmin
+ストライク sɯtoɾaikɯ
+陽太郎 joːtɑroː
+菜箸 sɑibɑʃi
+洋 jo̞ɯ
+レアケース reakeːsɯ
+無線 mɯsen
+体系 tɑikeː
+挽回 bɑnkɑi
+網 ɑmi
+債券 sɑiken
+しっとり ʃiʔto̞ri
+散歩 sɑnpo
+長身 tʃo̞ɯʃin
+つぐ t͡sɯgɯ
+權限 ke̞nge̞n
+支社 ʃiʃɑ
+議院 giin
+フリーター ɸɯriːtɑː
+過酷 kɑkokɯ
+別れる ɯakarerɯ
+全然 ze̞nze̞n
+沈静 tʃinse̞ː
+疑い ɯtɑgɑi
+罪 t͡sɯmi
+変えれ kɑere
+横山 jokojɑmɑ
+無愛想 mɯaiso
+知覚 tʃikɑkɯ
+病弱 bʲoːdʒɑkɯ
+星野 ho̞ʃino̞
+類 rɯi
+斑目 mɑdɑrɑme
+いつのまにか it͡sɯ no mɑ ni kɑ
+いまだ imɑdɑ
+誓う tʃikɑɯ
+シャネル ʃɑnerɯ
+澤 sɑvɑ
+試練 ʃire̞n
+スタッフ sɯtaffɯ
+ます masɯ
+若く ɯakakɯ
+アバター ɑbɑtɑː
+ココ ko̞ko̞
+お前 omɑe
+提言 te̞ːge̞n
+絵かき ekɑki
+庭 niɯa
+五 go̞
+見逃さ minoɣɑsɑ
+ヒダカトオル hidɑkɑtoːrɯ
+コバ kobɑ
+ロス ɾozɯ
+委任 inin
+せりふ seɾifɯ
+立ち寄っ tɑtʃijoʔ
+巻き返し mɑkikɑeʃi
+下り坂 kɯdɑrizɑkɑ
+おれ o̞re
+戦争 senso̞ː
+躍ら odorɑ
+短命 tɑnmeː
+ビル birɯ
+記 ki
+やり jɑri
+早熟 soːdʒjɯkɯ
+真髄 ʃinzɯi
+競争 kjo̞ːso̞ː
+定義 te̞ːgi
+正日 ʃo̞ɯnitʃi
+飛び回っ tobimaɯaʔ
+つめ t͡sɯme̞
+取材 ʃɯzɑi
+出掛け dekɑke
+引き受け hikiɯke̞
+代わる kaɯarɯ
+住み sɯmi
+台風 tɑiɸɯː
+コロリ ko̞ro̞ri
+捨て sɯte
+早退 soːtɑi
+旅 tɑbi
+鳴り物 nɑrimono
+入院 ɲɯːin
+にあたる ni ɑtɑrɯ
+やせ jɑze
+資金 ʃikin
+よー jo̞ː
+思想 ʃiso̞ː
+ミツバ mit͡sɯbɑ
+鼻 hɑnɑ
+並ぶ nɑrɑbɯ
+組み込ん kɯmiko̞n
+楠本 kɯsɯmoto
+手厚い teɑt͡sɯi
+自縄自縛 dʒidʒoː dʒibɑkɯ
+働かせる hataɾakaseɾɯ
+ラスカル ɾasɯkaɾɯ
+涙腺 ɾɯisen
+施設 ʃise̞tsɯ
+スミ sɯmi
+揃う soɾoɯ
+染みる ʃimirɯ
+合 ɑvɑze
+自然 ʃize̞n
+偽造 gizo̞ɯ
+走り抜く hɑʃiri nɯkɯ
+再発 saihatsɯɯ
+振り返っ ɸɯrikɑeʔ
+つり t͡sɯri
+章 ʃo̞ɯ
+輝 ɑkirɑ
+スーパー sɯːpaː
+少量 ʃo̞ɯrʲo̞ɯ
+突っ走る t͡sɯʔpɑʃirɯ
+しょうじ ʃo̞ɯdʒi
+透明 to̞ːmeː
+アクセント akɯsento
+行列 gʲo̞ɯret͡sɯ
+買 kɑi
+堅い kɑtɑi
+嘆かわしい nagekaɯaʃiː
+元日 gɑndʒit͡sɯ
+おおむね o̞ːmɯne
+チームメイト tʃiːmɯmeito̞
+暮らす kɯɾasɯ
+姐さん neːsɑn
+概要 gɑijoː
+適当 tekito̞ɯ
+ゼラチン zerɑtʃin
+凡そ o̞jo̞so̞
+回さ maɯasa
+何で nɑn de
+ブツクサ bɯtsɯkɯsa
+端末 tɑnmɑt͡sɯ
+瀧 tɑki
+イヤー ijɑː
+大急ぎ o̞ːiso̞gi
+生き延び ikino̞bi
+なぎ倒す nagitaosɯ
+助言 dʒo̞gen
+増田 masɯða
+一塁 itʃirɯi
+有能 jɯːno̞ɯ
+逃げ道 nige̞mitʃi
+バタバタ bɑtɑbɑtɑ
+凍っ ko̞o̞ʔ
+場合 bɑːi
+選ぶ erɑbɯ
+爺 dʒiː
+風呂 ɸɯro̞
+体制 tɑiseː
+損失 sonshitsɯ
+酒蔵 sakagɯra
+喫煙 kit͡sɯe̞n
+デ de̞
+目白押し medʒiro̞ːʃi
+ユーロ jɯːro̞
+プレイヤー pɯreijɑː
+告げ t͡sɯge̞
+緊迫 kinpɑkɯ
+手放し tebɑnɑʃi
+反応 hɑnnoː
+放火 hoːkɑ
+腰 ko̞ʃi
+有り ɑri
+元久 ge̞nkʲɯː
+有楽町 jɯːrɑkɯtʃoː
+振り付け ɸɯrit͡sɯke̞
+困難 konnɑn
+魂胆 kontɑn
+アルミシフトノブ ɑrɯmi ʃiɸɯto nobɯ
+入り込む hɑirikomɯ
+モチーフ mo̞tʃiːɸɯ
+内装 nɑisoː
+ラム rɑmɯ
+玄 ge̞n
+早期 soːki
+持っ mo̞ʔ
+寒く samɯkɯ
+証明 ʃo̞ːmei
+カルシファー kɑ rɯʃiɸɑː
+暗殺 ɑnsɑtsɯ
+甘い ɑmɑi
+凄まじかっ susamadʒikaʔ
+湯浅 jɯasa
+電力 denrʲo̞kɯ
+金融 kiɲjɯː
+佐藤 sɑtoː
+機材 kizɑi
+今更 imɑsɑɾɑ
+初め hɑdʒime
+生きがい ikigɑi
+メシ me̞ʃi
+損壊 sonkɑi
+アンチ ɑntʃi
+歌姫 ɯtɑhime
+モーツアルト moːt͡sɯɑrɯto
+南部 nɑnbɯ
+似合い niɑi
+たまらない tɑmɑrɑnɑi
+済ん sɯn
+驗豊富 ʃirɯʃi ho̞ɯɸɯ
+後日 go̞dʒit͡sɯ
+赴任 ɸɯnin
+入れ知恵 ire̞ tʃie̞
+構成 ko̞ːseː
+淘汰 toːtɑ
+椀 ɯan
+あいだ ɑidɑ
+明彦 ɑkihiko
+順天堂大 dʒɯntendoːdɑi
+頼もしい tɑnomoʃiː
+通っ to̞o̞ʔ
+願っ negɑʔ
+ほしさ hoʃisɑ
+プレッシャー pɯɾesʃaː
+変わっ kaɯaʔ
+着実 tʃɑkɯdʒit͡sɯ
+彩子 ɑjɑko
+給付 kʲɯːɸɯ
+全編 ze̞npe̞n
+会っ kɑiʔ
+イチゴ itʃigo̞
+駒井 komɑi
+荷物 nimo̞t͡sɯ
+格差 kakɯsa
+まとめ mɑtome
+プログラム pɯrogɯrɑmɯ
+もつ mo̞t͡sɯ
+割安 waɾijasɯ
+感化 kɑnkɑ
+随時 zɯidʒi
+衝撃 ʃo̞ɯgeki
+釈放 ʃɑkɯhoː
+囚 ʃɯː
+ぐっと gɯʔ to̞
+アレコレ ɑrekore
+ふためい ɸɯtɑmei
+マーベラス maːbeɾasɯ
+迫ろ semɑɾo
+みせ mise̞
+食えれ kɯe̞re̞
+王子製紙 owdʒiseːʃi
+夜明け joɑke
+敏感 binkɑn
+投薬 toːjɑkɯ
+伴奏 bɑnsoː
+長持ち nɑgɑmotʃi
+見つめる mit͡sɯme̞rɯ
+転機 te̞nki
+ちらりと tʃirɑri to
+原始 ge̞nʃi
+オレ o̞re
+審理 ʃinri
+使っ t͡sɯkɑʔ
+殺意 satsɯi
+学院 gɑkɯin
+おっしゃる oʔʃaɾɯ
+愚痴 gɯtʃi
+ウォン o̞n
+前 mɑe
+上値 ɯɯane
+華子 hɑnɑko
+つかも t͡sɯ kɑ mo
+町 mɑtʃi
+有利 jɯːri
+前歯 mɑebɑ
+きなこ kinɑko
+平清盛 hirɑ kijomori
+組み合わせ kɯmiawase
+騒々し so̞ːzo̞ːʃi
+統一 to̞ɯit͡sɯ
+ギャグ gʲɑgɯ
+集める ɑt͡sɯmerɯ
+昼 hirɯ
+コール ko̞ːrɯ
+分析 bɯnseki
+ずくめ zɯkɯme̞
+強く t͡sɯjo̞kɯ
+謎 nɑzo
+史 ɸɯmi
+いろ iro̞
+本家 ho̞nke
+称える tɑtɑerɯ
+カン kɑn
+鯉のぼり ko̞ino̞bo̞ri
+ローマ roːmɑ
+ベストアンサー besɯtoansaː
+訪日 ho̞ɯnitʃi
+舞台 bɯtɑi
+飲ま nomɑ
+占え ɯrɑnɑe
+くる kɯrɯ
+音読 o̞ndo̞kɯ
+安泰 ɑntɑi
+本職 ho̞nʃo̞kɯ
+都内 tonɑi
+小室 ko̞mɯro̞
+足し tɑʃi
+勉強 benkʲo̞ɯ
+暑 ʃo̞
+放射能 hoːʃɑ noː
+ジェネラリ dʒene rɑri
+版 bɑn
+表し araɯaʃi
+付加 ɸɯkɑ
+威勢 ise̞ː
+不 ɸɯ
+杉本 sɯximoto
+頂い itɑdɑi
+みじめ midʒime̞
+機先 kise̞n
+寄せ jo̞ze
+膝 hizɑ
+氏 ʃi
+覚え o̞bo̞e
+こぎつける ko̞git͡sɯkerɯ
+遅刻 tʃiko̞kɯ
+粘る nebɑrɯ
+水野 mizɯno̞
+ホンネ ho̞nne
+すかさず sɯkasazɯ
+頭部 to̞ɯbɯ
+おせっかい oseʔkɑi
+懐かし nɑt͡sɯkɑʃi
+同性愛 doːseːɑi
+頼り tɑjori
+両省 rʲo̞ɯʃo̞ɯ
+干し ho̞ʃi
+出し入れ dɑʃiire
+適性 te̞kise̞ː
+商品 ʃo̞ɯhin
+検知 ke̞ntʃi
+きっと kiʔto̞
+** ɕi
+津村 t͡sɯmɯrɑ
+転職 tenʃo̞kɯ
+粉ミルク konɑ mirɯkɯ
+ときめき to̞kimeki
+戦意 se̞ni
+狩り kɑri
+会談 kɑidɑn
+そんなに sonnɑni
+幻 mɑboroʃi
+個人 ko̞dʒin
+封入 ɸɯːɲɯː
+西田 niʃidɑ
+着々 tʃɑkɯtʃɑkɯ
+奮起 ɸɯnki
+養成 jo̞ːseː
+特技 to̞kɯgi
+継ぎ t͡sɯgi
+うってつけ ɯʔte̞t͡sɯke̞
+常に t͡sɯne̞ ni
+ユーザー jɯːzɑː
+友情 jɯːdʒo̞ɯ
+植物 ʃo̞kɯbɯt͡sɯ
+当てる ɑterɯ
+根羽 nebɑ
+腹痛 ɸɯkɯt͡sɯː
+ころ ko̞ro̞
+ユニーク jɯniːkɯ
+あきれ ɑkire
+自律 dʒirit͡sɯ
+緩く jɯrɯkɯ
+満足 mɑnzokɯ
+健気 kenɑge
+棲み sɯmi
+お笑い oɯarai
+クリス kɯɾisɯ
+もし mo̞ʃi
+世襲 seshɯː
+システム ʃisɯtemɯ
+アルペン ɑrɯpen
+泣い nɑi
+間違え mɑtʃigɑe
+心変わり kokorogaɯari
+売り物 ɯrimo̞no̞
+もめ mo̞me
+重要 dʒɯːjo̞ɯ
+木内 kiɯtʃi
+林 hɑjɑʃi
+守ら mɑmorɑ
+ペンキ pe̞nki
+理 ri
+警察 keːsɑtsɯ
+坪内 t͡sɯbo̞ɯtʃi
+各種 kɑkɯʃɯ
+ながら nɑgɑrɑ
+終日 ʃɯːdʒit͡sɯ
+岩松 gɑnʃoː
+一理 itʃiri
+中位 tʃɯːi
+ハローデイ hɑroː dei
+誇る ho̞ko̞rɯ
+登場 to̞ɯdʒo̞ɯ
+やれる jɑrerɯ
+是 ze̞
+消費 ʃo̞ɯhi
+原型 ge̞nke̞ː
+ジメジメ dʒime̞dʒime̞
+銃 dʒɯː
+傷つける kizɯt͡sɯke̞rɯ
+味 ɑdʒi
+ブライアン bɯrɑiɑn
+討議 to̞ɯgi
+遺棄 iki
+只野 tɑdɑno
+気が付か ki gɑ t͡sɯkɑ
+ホンダブリヂストン hondabɯɾizisɯton
+壁紙 kɑbegɑmi
+出足 deɑʃi
+カリグラ kɑrigɯrɑ
+済ませる sɯmaseɾɯ
+ダイニング dɑiningɯ
+不快 ɸɯkɑi
+フランス語 fɯransɯgo
+演じる e̞ndʒirɯ
+ストーブ sɯto催すbɯ
+騒がしい sɑvɑɠɑʃiː
+応え kotɑe
+快走 kɑisoː
+ハッ hɑ
+明報 meːho̞ː
+による ni jo̞rɯ
+大きい o̞ːkii
+満期 mɑnki
+下着 ʃitɑgi
+ベーコン beːko̞n
+リビア ribiɑ
+やせる jaseɾɯ
+生き方 ikikɑtɑ
+パケ pɑke
+とても to̞temo̞
+合える ɑerɯ
+ホープ ho̞ːpɯ
+くし kɯʃi
+から kɑrɑ
+再編 sɑien
+クチバシ kɯtʃibɑʃi
+貫徹 kɑntet͡sɯ
+気遣わ kizɯkaɯa
+波紋 hɑmon
+ハウル hɑɯrɯ
+ほほ笑ん ho̞ho̞en
+購買 koːbɑi
+フン ɸɯn
+ちょっと tʃo̞ʔto̞
+日時 nitʃidʒi
+反感 hɑnkɑn
+いかにも ikɑni mo
+兆し kizɑʃi
+れ re̞
+さ sɑ
+見つから mit͡sɯkɑrɑ
+佐々木 sɑsɑki
+ロイター通信 roitɑː t͡sɯːʃin
+水増し mizɯmɑʃi
+ヒマラヤ himɑrɑjɑ
+ミクシィ mikɯʃi
+頻度 hindo̞
+羽田 hɑtɑ
+貯蓄 tʃo̞tʃikɯ
+タブー tɑbɯː
+ウエイトレス weitoɾesɯ
+中道 tʃɯːdo̞ɯ
+低い hikɯi
+晴れ渡っ hare ɯataʔ
+禁輸 kiɲjɯ
+葵 ɑoi
+石原 iʃihɑrɑ
+盛り mo̞ri
+金銭 kinsən
+地下 tʃikɑ
+意外 igɑi
+日帰り higɑeri
+聴こえ kiko̞e
+スポイル sɯpoiɾɯ
+ブルペン bɯrɯpe̞n
+視点 ʃite̞n
+県内 kennɑi
+佑 jɯː
+創意 so̞ːi
+黎明 re̞ːme̞ː
+がっかり gɑʔkɑri
+遺言 jɯigo̞n
+鴨川 kamogaɯa
+襲いかかる osoikakarɯ
+冷気 re̞ːki
+まわる maɯarɯ
+道具 do̞ɯgɯ
+括る kɯkɯrɯ
+高め tɑkɑme
+式場 ʃikidʒo̞ɯ
+弱者 dʒɑkɯʃɑ
+挙げる ɑgerɯ
+モナリザ monɑrizɑ
+アフガニスタン afɯɣanisɯtan
+無人島 mɯdʒinto̞ɯ
+チャイナ tʃɑinɑ
+落ちつい o̞tʃit͡sɯi
+川柳 senrjɯː
+死者 ʃiʃɑ
+猛然と mo̞ɯzen to̞
+見え隠れ miekɑkɯre
+導け mitʃibike̞
+適用 tekijo̞ɯ
+最大手 sɑioːte
+反発 hɑnpɑt͡sɯ
+なくなる nɑkɯnɑrɯ
+外相 gɑiʃoː
+県民 ke̞nmin
+リーフ riːɸɯ
+噴射 ɸɯnʃɑ
+ふみ ɸɯmi
+今週 ko̞nʃɯː
+残虐 zɑngʲɑkɯ
+共和党 kʲoːɯatoː
+外食 gɑiʃokɯ
+ライター rɑitɑː
+移民 imin
+配偶 hɑigɯː
+付属 ɸɯzo̞kɯ
+着地 tʃɑkɯtʃi
+解き ho̞do̞ki
+リンゼイ rinze̞ː
+熱く ɑt͡sɯkɯ
+内需 nɑidʒɯ
+ガレキ gɑreki
+繁栄 hɑneː
+ラフ rɑɸɯ
+いっち iʔtʃi
+間に合い mɑniɑi
+移籍 ise̞ki
+連邦 renpo̞ɯ
+のぼせ no̞bo̞ze
+クリ kɯri
+ハーバード大 hɑːbɑːdodɑi
+抑え osɑe
+兵役 he̞ːe̞ki
+特有 to̞kɯjɯː
+野茂 no̞mo̞
+注ぎ込ん so̞so̞giko̞n
+各自 kɑkɯdʒi
+但し tɑdɑʃi
+腹立つ hɑrɑdɑt͡sɯ
+堰 se̞ki
+うろうろ ɯro̞ɯro̞
+請け負い ɯkeo̞i
+払える hɑrɑerɯ
+観覧 kɑnrɑn
+引きずら hikizɯrɑ
+ばっか bɑʔkɑ
+遠のい to̞ːno̞i
+引か hikɑ
+施さ hodokosɑ
+下水道 gəsɯidoː
+バージョン bɑːdʒon
+ラーソン rɑːson
+お年 o̞to̞ʃi
+マルチ mɑrɯtʃi
+思い描き omoiegɑki
+完成 kɑnseː
+妓生 kiːsɑn
+もらっ morɑʔ
+若松 ɯakamat͡sɯ
+冷え性 hieʃo̞ɯ
+嘲笑 tʃo̞ɯʃo̞ɯ
+参拝 sɑ̃paj
+目出度い medetɑi
+選挙 senkjo̞
+考えろ kɑngɑero
+汚染 o̞sen
+あろ ɑro
+落とさ otosɑ
+古巣 fɯɾɯsɯ
+水がめ mizɯgɑme
+甘み ɑmɑmi
+役所 jɑkɯʃo
+すっぱり sɯʔpaɾi
+ガム gɑmɯ
+画質 gɑʃit͡sɯ
+そこで so̞ko̞de
+悩ん nɑjɑn
+山林 sɑnrin
+落合 otʃiɑi
+鈴木 sɯzɯki
+しきたり ʃikitɑri
+土下座 do gezɑ
+中田 nɑkɑtɑ
+脱毛 dɑt͡sɯmoː
+そちら sotʃiɾɑ
+革新 kɑkɯʃin
+小説 ʃoːsetsɯ
+あらためて ɑrɑtɑmete
+外形 gɑikeː
+本態 hontɑi
+見抜か minɯkɑ
+湯郷 jɯno̞go̞ɯ
+福永 ɸɯkɯnɑgɑ
+荒木 ɑrɑki
+言い方 iikɑtɑ
+腫れ hɑre
+歯科 ʃikɑ
+かおり kɑori
+上品 dʒo̞ɯbo̞n
+直せる naoseɾɯ
+自殺 dʒisatsɯsu
+設備 setsɯbi
+ファイト ɸɑito
+両石湾 rʲoːiʃi ɯan
+肩書 kɑtɑgɑki
+ヘッジファンド hedʒdʒi ɸɑndo
+メイン me̞in
+候補 ko̞ɯho̞
+末路 mɑt͡sɯro
+スキーヤー sɯkiijaa
+危ない ɑbɯnɑi
+各紙 kɑkɯʃi
+新年 ʃinne̞n
+蘇ら jomigɑerɑ
+角 kɑkɯ
+府庁 ɸɯtʃo̞ɯ
+好転 ko̞ɯten
+仆 ?
+笑え ɯarae
+比較 hikɑkɯ
+アクアライン ɑkɯɑ rɑin
+書評 ʃo̞ço̞ɯ
+小言 ko̞go̞to̞
+日医 nitʃii
+内側 ɯtʃigaɯa
+奪い合い ɯbɑiɑi
+厳し kibiʃi
+くさい kɯsai
+遠 to̞ː
+英語 e催すgo̞
+じり高 dʒiridɑkɑ
+専攻 senko̞ː
+リーダー riːdɑː
+委ね jɯdɑne
+排除 hɑidʒo
+後ろ ɯʃiro̞
+野心 jɑʃin
+指紋 ʃimo̞n
+駄文 dɑbɯn
+着物 kimo̞no̞
+三国志 mikɯniʃi
+ひっくり返し hiʔkɯrikɑeʃi
+次男 dʒinɑn
+見せ付け mise̞tsɯke̞
+無論 mɯro̞n
+不変 ɸɯhe̞n
+傾げる kɑʃigerɯ
+テンキー te̞n kiː
+スギ sɯgi
+市 ʃi
+北上 ho̞kɯdʒo̞ɯ
+努め t͡sɯto̞me
+会見 kɑiken
+黒柳 kɯrojɑnɑgi
+平壌放送 pjonjɑnhoːsoː
+返事 he̞ndʒi
+訪露 ho̞ɯro̞
+見回す mimawasɯ
+というのも to̞ iɯ no̞ mo̞
+メア meɑ
+ローブ ro̞ːbɯ
+岐阜 giɸɯ
+綱 t͡sɯnɑ
+通過 t͡sɯːkɑ
+度 do̞
+こんなに konnɑ ni
+ジェンキンス jɯnkinsɯ
+守れ mɑmore
+吹き ɸɯki
+世帯 setɑi
+直輸入 dʒikɑ jɯɲɯː
+倫理 rinri
+滑り落ちる sɯbeɾiokiɾɯ
+いえる ie̞rɯ
+後前 ɯʃiromɑe
+やわらか jaɯaraka
+社会保険庁 ʃɑkɑi hokentʃoː
+淵 ɸɯtʃi
+分配 bɯnpɑi
+邊 ?
+後ろ姿 ɯʃiɾosɯɣata
+今や imɑ jɑ
+佇む tɑtɑzɯmɯ
+接種 sesʃɯ
+別々 be̞t͡sɯbe̞t͡sɯ
+胡 ko̞
+炭 sɯmi
+現状 gendʒo̞ɯ
+災害 sɑiɣɑi
+太平洋戦争 tɑiheːjoːsɛnsoː
+例外 reːgɑi
+システムメンテナンススクリプト ʃisɯtemɯmentenansɯsɯkɯripɯto
+書き込み kɑkikomi
+請わ koɯa
+ワクワク ɯakɯɯakɯ
+ルーム rɯːmɯ
+田 tɑ
+ミル mirɯ
+薄く ɯsɯkɯ
+若気 ɯakage
+家 ie̞
+肩こり kɑtɑkori
+崩す kɯzɯsɯ
+生々しい nɑmɑnɑmɑʃiː
+治安 tʃiɑn
+備蓄 bitʃikɯ
+刺さっ sɑsɑʔ
+郵送 jɯːsoː
+両者 rʲoːʃɑ
+言い放っ iihɑnɑʔ
+終値 oɯarine
+降り立っ oritɑʔ
+修了 ʃɯːrʲo̞ɯ
+戦術 seɲʒɯt͡sɯ
+図書 to̞ʃo̞
+お腹 onɑkɑ
+大晦日 oːmisokɑ
+モンスター monsɯtaː
+真下 mɑʃitɑ
+給与 kʲɯːjo̞
+今 imɑ
+くれ kɯre̞
+一路 itʃiro̞
+未遂 misɯi
+冷やかし hijɑkɑʃi
+刺激 ʃige̞ki
+どっしり do̞ʔsiɾi
+組織 so̞ʃiki
+幹部 kɑnbɯ
+最大限 sɑidɑigen
+切り替わる kirikaɯarɯ
+甲乙 ko̞ɯo̞t͡sɯ
+量販 rʲoːhɑn
+ビズリーチ bizɯ riːtʃi
+人物 dʒinbɯt͡sɯ
+お世話 osevɑ
+忠実 tʃɯːdʒit͡sɯ
+突如 to̞t͡sɯdʒo̞
+鋭く sɯɾɯdokɯ
+新鮮 ʃinsən
+不思議 ɸɯʃigi
+世の中 jononɑkɑ
+増殖 zo̞ɯʃo̞kɯ
+トレガラケー toregɑrɑkeː
+全巻 zenkɑn
+新書 ʃinʃo̞
+梶浦 kɑdʒiɯrɑ
+慣用 kɑ̃jo̞ː
+例年 re̞ːne̞n
+センス sensɯ
+ベビー服 be̞biːɸɯkɯ
+関 se̞ki
+ひもとい himo̞to̞i
+返っ kɑeʔ
+顕著 kentʃo̞
+チャネル tʃɑnerɯ
+語る kɑtɑrɯ
+性能 seːno̞ː
+広域 ko̞ɯiki
+捕らえ torɑe
+助け出し tasɯkedaʃi
+常任 dʒo̞ɯnin
+祖国 sokokɯ
+カリブ海 kɑribɯkɑi
+打て ɯte̞
+着き t͡sɯki
+かたや kɑtɑ jɑ
+いわ iɯa
+マセラティ mɑseɾɑti
+ガラパゴス gaɾapaɣosɯ
+社長 ʃɑtʃoː
+ポルトガル porɯtogɑrɯ
+ぐらい gɯrɑi
+メヂカラ mezikɑrɑ
+殺める ɑjɑmerɯ
+切れる kire̞rɯ
+協定 kʲo̞ːteː
+方面 ho̞ɯmen
+変革 henkɑkɯ
+直接 tʃokɯsetsɯ
+甚大 dʒindɑi
+穏やか odɑjɑkɑ
+タブ tɑbɯ
+除隊 dʒotɑi
+宇宙 ɯtʃɯː
+くつろぎ kɯt͡sɯro̞gi
+クラフト kɯrɑɸɯto
+没入 bo̞t͡sɯɲɯː
+メトロ meto̞ro̞
+精力 seːɾjokɯ
+途中 to̞tʃɯː
+高安 takajasɯ
+振る舞う ɸɯrɯmɑɯ
+かた kɑtɑ
+性癖 se̞ːhe̞ki
+フツー ɸɯt͡sɯː
+よい jo̞i
+公選法 koːsenhoː
+はがし hɑgɑʃi
+見つかり mit͡sɯkɑri
+大正 tɑiʃoː
+尾行 biko̞ɯ
+トータル toːtɑrɯ
+出来上がり dekiɑgɑri
+槿恵元 ?? mo̞to̞
+応答 o̞ɯto̞ɯ
+無性に mɯʃo̞ɯ ni
+滑り sɯbeɾi
+観点 kɑnten
+語りかける kɑtɑrikɑkerɯ
+ご愁傷さま goʃɯːʃoːsama
+守門 sɯmon
+ストレート sɯtoɾeːto
+行き過ぎ ikisɯgi
+選 se̞n
+訳読 jɑkɯdokɯ
+研究所 kenkʲɯːʃo̞
+変貌 henbo̞ɯ
+答案 toːɑn
+痺れ ʃibire̞
+エール e̞ːrɯ
+鎮め ʃizɯme̞
+真意 ʃini
+後れ o̞kɯre
+てっきり te̞ʔkiri
+長男 tʃoːnɑn
+購読 ko̞ɯdo̞kɯ
+一切 isɑi
+嫌味 ijɑmi
+回収 kɑiʃɯː
+プルクラユ pɯrɯkɯrɑjɯ
+文句 mo̞nkɯ
+酔っぱらい joʔpɑrɑi
+ミ mi
+アンリ ɑnri
+かね kɑ ne
+肥料 hirʲo̞ɯ
+ボード bo̞ːdo̞
+発祥 hɑʃoː
+利益 rie̞ki
+陣羽織 dʒin hɑori
+盛ん sɑkɑn
+おご o̞ go̞
+幸せ ʃiaɯaze
+リゾート rizo̞ːto̞
+巻い mɑi
+出社 ʃɯsʃɐ
+づらい zɯrɑi
+節約 setsɯjakɯ
+人事考課 dʒindʒi koːkɑ
+オト o̞to̞
+寡黙 kɑmokɯ
+平常 heːdʒo̞ː
+録 ro̞kɯ
+俵 taɯara
+マネジャー mɑnedʒɑː
+佳境 kɑkʲoː
+傾け kɑtɑmɯke
+手相 teso̞ː
+取り組み to̞rikɯmi
+見放し mihɑnɑʃi
+高かっ tɑkɑkɑʔ
+下位 kɑi
+篇 he̞n
+グロス gɯɾosɯ
+桑 kɯɯa
+切なく setsɯnakɯ
+紅白 koːhɑkɯ
+今に imɑ ni
+辛らつ ʃinrɑt͡sɯ
+小沢 ozaɯa
+世紀 se̞ːki
+三井 mit͡sɯi
+読み書き jomikɑki
+室根 mɯro̞ne
+とろろ to̞ro̞ro̞
+踏み込ん ɸɯmiko̞n
+和紙 ɯaʃi
+後場 gobɑ
+急かし sekɑʃi
+餃子 gʲoːzɑ
+磁石 dʒiʃɑkɯ
+制限 se̞ːge̞n
+おぼれ o̞bo̞re
+そのまま sonomɑmɑ
+感心 kɑnʃin
+できるだけ dekirɯ dɑke
+外部 gɑibɯ
+政党 seːtoː
+際立ち kiɯadatʃi
+責任 se̞kinɪn
+山形大学 jɑmɑgɑtɑ dɑigɑkɯ
+妙手 mʲo̞ɯʃɯ
+急騰 kʲɯːto̞ɯ
+題し dɑiʃi
+引き締める hikiʃime̞rɯ
+定説 te̞ːse̞tsɯ
+シャツ ʃɑt͡sɯ
+記さ ʃiɾɯsa
+焼き鳥 jɑkitori
+一川 itʃikaɯa
+低位 te̞ːi
+休業 kʲɯːgʲo̞ɯ
+有事 jɯːdʒi
+須 ʃɯ
+高橋 tɑkɑhɑʃi
+用いる mo̞tʃiirɯ
+瞬間 ʃɯnkɑn
+葉室 hɑʃit͡sɯ
+フェヒナー ɸehinɑː
+多岐 tɑki
+傍聴 bo̞ɯtʃo̞ɯ
+愛想 ɑiso
+解毒 gedo̞kɯ
+得てして e̞te̞ʃite̞
+コテ ko̞te
+失わ ɯʃinaɯa
+背け somɯke
+フリ ɸɯri
+稲船 inɑbɯne
+向かわ mɯkaɯa
+理学部 rigɑkɯ bɯ
+暫く ʃibɑrɑkɯ
+兼業 kengʲo̞ɯ
+ジュン dʒɯn
+螺旋 ɾɯzɯn
+石垣 iʃigɑki
+防疫 bo̞ɯeki
+沼宮内 nɯmɑkɯnɑi
+秘話 hiɯa
+縁 e̞n
+符号 ɸɯgo̞ɯ
+発症 hɑsoː
+珍 tʃin
+踏み切る ɸɯmikirɯ
+生き iki
+クリスティーン kɯrisɯtiːn
+行為 ko̞ɯi
+船室 senshitsɯ
+遭う ɑɯ
+それぞれ so̞ɾezo̞ɾe
+終点 ʃɯːte̞n
+火事 kɑdʒi
+スパイ sɯpai
+序章 dʒo̞ʃo̞ɯ
+かわいい kaɯaiː
+切っ kiʔ
+まくっ mɑkɯʔ
+実体験 dʒit͡sɯtɑiken
+閉める ʃime̞rɯ
+決まっ kimɑʔ
+宮城 mijɑgi
+奥山 okɯjɑmɑ
+常識 dʒo̞ɯʃiki
+階段 kɑidɑn
+神木 ʃinbo̞kɯ
+かえって kɑeʔte
+アラン ɑrɑn
+突き t͡sɯki
+悪者 ɯarɯmono
+創 hɑdʒime
+贅沢 zeːtɑkɯ
+腸閉塞 tʃoːheːsokɯ
+おさらば osɑrɑbɑ
+瀬戸 seto̞
+隆三 rʲɯːzo̞ɯ
+所得 ʃo̞to̞kɯ
+ズラリ zɯrɑri
+秋元 ɑkimoto
+悪筆 ɑkɯhit͡sɯ
+青物 ɑomono
+ものの mo̞no̞ no̞
+洋書 jo̞ɯʃo̞
+当てはまる ɑtehɑmɑrɯ
+ナゾ nɑzo
+つっこま t͡sɯʔkomɑ
+仲 nɑkɑ
+身元 mimo̞to̞
+暑く ɑt͡sɯkɯ
+遺書 iʃo̞
+小売 ko̞ɯri
+婆 bɑː
+つもり t͡sɯmo̞ri
+てん te̞n
+敬っ tɑkɑʃiʔ
+感性 kɑnseː
+もの mo̞no̞
+ふ ɸɯ
+弱気 joɯaki
+平気 he̞ːki
+楽しい tɑnoʃiː
+なろ nɑro
+惜しく o̞ʃikɯ
+ほろ苦い horonigɑi
+イオン io̞n
+路線 ɾo̞zɯn
+大波 oːnɑmi
+剛士 tɑkeʃi
+故に jɯe̞ ni
+捨てよ sɯtejo
+根拠 ko̞nkʲo̞
+勝ち取っ kɑtʃitoʔ
+正義 mɑsɑjoʃi
+奥義 o̞ɯgi
+または mata ɯa
+発電 hɑt͡sɯden
+サムライ samɯɾai
+撲滅 bo̞kɯmet͡sɯ
+落ちる o̞tʃirɯ
+ミスター misɯtaː
+昨日 kino̞ɯ
+居眠り ine̞mɯri
+トーンダウン toːn dɑɯn
+譲れ jɯzɯre̞
+春先 haɾɯsaki
+心地よい ko̞ko̞tʃijo̞i
+パロー pɑɾo
+正反対 seːhɑntɑi
+先人 se̞ɲdʒin
+語ら kɑtɑrɑ
+連動 rendo̞ɯ
+停泊 teːhɑkɯ
+赤字 ɑkɑdʒi
+行える okonɑerɯ
+ドイツ do̞it͡sɯ
+パート pɑːto
+笑顔 egɑo
+極めつけ kiɯamet͡sɯke
+投げる nɑgerɯ
+長打 tʃoːdɑ
+人前 hitomɑe
+ウェルカムページ erɯkɑmɯ peːdʒi
+すぎる sɯgiɾɯ
+だい dɑ i
+むくん mɯkɯn
+異変 ihe̞n
+きら kirɑ
+根っこ neʔko̞
+古典 ko̞ten
+取り組も to̞rikɯmo̞
+アクティブ ɑkɯtibɯ
+内 ɯtʃi
+騒然 so̞ːzen
+廃れる sɯtaɾeɾɯ
+挟ん hɑsɑn
+こぶし ko̞bɯʃi
+聴か kikɑ
+ロード ro̞ːdo̞
+タケシ tɑkeʃi
+ってな ʔte nɑ
+みなさま minɑsɑmɑ
+飲み食い no̞mikɯi
+反落 hɑnrɑkɯ
+タスポ tasɯpo
+根本 ko̞npo̞n
+和具 ɯa gɯ
+頑張れる gɑnbɑrerɯ
+事務職 dʒimɯ ʃo̞kɯ
+酔いつぶれ jo̞it͡sɯbɯre
+力走 ɾikiso̞ː
+学長 gɑkɯtʃoː
+躍る o̞do̞rɯ
+ヤツ jɑt͡sɯ
+途絶える todɑerɯ
+ラウンド rɑɯndo
+税制 zɑiseː
+月曜日 get͡sɯjo̞ɯ hi
+定数 teːsɯː
+あまり ɑmɑri
+乗り物 no̞rimo̞no̞
+かわり kaɯari
+片栗粉 kɑtɑkɯriko
+タダ tɑdɑ
+増税 zo̞ːzeː
+易く jasɯkɯ
+絶賛 zɛʔsɑn
+指標 ʃiço̞ɯ
+デキ de̞ki
+自体 dʒitɑi
+意気込み ikiko̞mi
+書か kɑkɑ
+裏庭 ɯraniɯa
+中毒 tʃɯːdo̞kɯ
+詰め込ん t͡sɯmeko̞n
+休む jasɯmɯ
+怒っ do̞ʔ
+ラツィオ rɑt͡sio
+とどまら todomɑrɑ
+寝る ne̞rɯ
+ジロジロ dʒiro̞dʒiro̞
+出せる daseɾɯ
+星野リゾート ho̞ʃino̞ rizo̞ːto̞
+うかがい ɯkɑgɑi
+健治 ke̞ndʒi
+工学科 koːgɑkɯkɑ
+資産 ʃisɑn
+舐め nɑme
+アール ɑːrɯ
+神さま kɑmisɑmɑ
+ありとあらゆる ɑri to ɑrɑjɯrɯ
+象 zo̞ɯ
+拗ね sɯne
+いたちごっこ itɑtʃigoʔko
+穏健 o̞nken
+滅ぶ ho̞ro̞bɯ
+特徴 to̞kɯtʃo̞ɯ
+松本 mɑt͡sɯmoto
+淹 e̞n
+博士 hɑkɑse
+負ける mɑkerɯ
+省エネ ʃo̞ɯene
+水泳 sɯjej
+死蔵 ʃizo̞ɯ
+見渡せ miɯatase
+輝かしい kɑgɑjɑkɑʃiː
+保 ho̞
+講義 ko̞ɯgi
+こぎつけ ko̞git͡sɯke
+脳 no̞ɯ
+食料 ʃo̞kɯrʲo̞ɯ
+王朝 o̞ɯtʃo̞ɯ
+具合 gɯɑi
+走行 so̞ːko̞ː
+電機 de̞nki
+鬼畜 kitʃikɯ
+給電 kʲɯːde̞n
+外来 gɑirɑi
+かも kɑ mo
+販促 hansokɯ
+高倉 tɑkɑkɯrɑ
+農 no̞ɯ
+金元 kɑnemoto
+はたして hɑtɑʃite
+神妙 ʃinmʲo̞ɯ
+雄輔 jɯːsɯke
+逆らう sakaɾaɯ
+ゅ jw
+緩和 kanɯa
+ソソ so̞so̞
+ガソリン gɑsoɾin
+アイテム ɑitemɯ
+遊べる asobeɾɯ
+市立 ʃirit͡sɯ
+朋 to̞mo̞ː
+づけ zɯke̞
+手袋 tebɯkɯro̞
+明確 meːkɑkɯ
+開放 kɑihoː
+少なく sɯkɯnakɯ
+エルニーニョ erɯniːɲo̞
+環境 kɑnkʲoː
+方式 ho̞ɯʃiki
+競走 kjo̞ːso̞ː
+戻っ mo̞do̞ʔ
+補う oginɑɯ
+列車 ɾɛsʃɑ
+読み jo̞mi
+ハケン hɑken
+新 ʃin
+華道 kɑdoː
+吹き込む ɸɯkiko̞mɯ
+罪滅ぼし t͡sɯmiho̞ro̞bo̞ʃi
+攻略 koːrʲɑkɯ
+すでに sɯdeni
+競争心 kjo̞ːso̞ːʃin
+野菜 jɑsɑi
+黒カビ kɯrokɑbi
+見いだせ miidɑse
+失業 ʃit͡sɯgʲo̞ɯ
+箱根 hɑkone
+減らす heɾasɯ
+思い浮かべる omoiɯkɑberɯ
+蔵書 zo̞ɯʃo̞
+踏切 ɸɯmikiri
+見張り mihɑri
+ヤマ jɑmɑ
+朗読 ro̞ɯdo̞kɯ
+いれ ire̞
+チェック tʃe̞ʔkɯ
+ロッククライミング roʔkɯ kɯrɑimingɯ
+熱気 ne̞ʔki
+喫緊 kiʔkin
+一貫 iʔkɑn
+ノック no̞ʔkɯ
+ダイナミック dɑinɑmiʔkɯ
+クック kɯʔkɯ
+テック te̞ʔkɯ
+ブラック bɯrɑʔkɯ
+結果 keʔkɑ
+血管 keʔkɑn
+別館 beʔkɑn
+罰金 bɑʔkin
+タック tɑʔkɯ
+刻々 ko̞ʔko̞kɯ
+ショッキング ʃo̞ʔkingɯ
+仏教 bɯʔkʲo̞ɯ
+裂肛 reʔko̞ɯ
+別海 beʔkɑi
+キックオフ kiʔkɯo̞ɸɯ
+一刻 iʔko̞kɯ
+湿気 ʃiʔke̞
+仏教徒 bɯʔkʲo̞ɯ to̞
+学級 gɑʔkʲɯː
+別件 be̞ʔke̞n
+落下 rɑʔkɑ
+サイドバック saidobaʔkɯ
+真っ暗 mɑʔkɯrɑʔ
+恰好 kɑʔko
+日刊 niʔkɑn
+ブックガイド bɯʔkɯ gɑido
+バック bɑʔkɯ
+一過 iʔkɑ
+確固たる kɑʔko tɑrɯ
+出血 ʃɯʔke̞t͡sɯ
+ムック mɯʔkɯ
+一行 iʔko̞ɯ
+百貨店 çɑʔkɑten
+厄介 jɑʔkɑi
+ブラックホール bɯrɑʔkɯ hoːrɯ
+鉄橋 teʔkʲo̞ɯ
+一挙一動 iʔkʲo̞ itʃido̞ɯ
+楽観 rɑʔkɑn
+ブック bɯʔkɯ
+白金 hɑʔkin
+コマツファナック komɑt͡sɯ ɸɑnɑʔkɯ
+一家 iʔkɑ
+チェックポイント tʃeʔkɯpo̞into̞
+発見 hɑʔken
+国庫 ko̞ʔko̞
+撤回 teʔkɑi
+躍起 jɑʔki
+激昂 geʔko̞ɯ
+キヤノンパナソニック kijanonpanasoniʔkɯ
+直近 tʃo̞ʔkin
+属国 zo̞ʔko̞kɯ
+一角 iʔkɑkɯ
+ロック ro̞ʔkɯ
+出荷 ʃɯʔkɑ
+ロジック ro̞dʒiʔkɯ
+食器 ʃo̞ʔki
+特攻 to̞ʔko̞ɯ
+結婚式 keʔko̞n ʃiki
+逆境 gʲɑʔkʲoː
+パッケージ pɑʔkeːdʒi
+雑感 zɑʔkɑn
+ライフハック rɑiɸɯ hɑʔkɯ
+ロディック ro̞diʔkɯ
+パニック pɑniʔkɯ
+一気に iʔki ni
+北海道 hoʔkɑidoː
+国交 ko̞ʔko̞ɯ
+学校 gɑʔkoː
+立候補 riʔko̞ɯho̞
+若干 dʒɑʔkɑn
+ルック rɯʔkɯ
+復帰 ɸɯʔki
+発光 hɑʔkoː
+ブラックボックス bɯraʔkɯboʔkɯsɯ
+着工 tʃɑʔkoː
+発酵 hɑʔkoː
+グラフィック gɯrɑɸiʔkɯ
+月間 geʔkɑn
+トピック to̞piʔkɯ
+骨格 koʔkɑkɯ
+小学校 ʃoːgɑʔkoː
+絶句 ze̞ʔkɯ
+クイックローディングシステム kɯiʔkɯroːdiŋɡɯʃisɯtemɯ
+一見 iʔke̞n
+北海 hoʔkɑi
+ハッキリ hɑʔkiri
+学科 gɑʔkɑ
+パブリック pɑbɯriʔkɯ
+日記 niʔki
+ブックバンドルーラー bɯʔkɯ bɑndo rɯːrɑː
+脳溢血 no̞ɯ iʔket͡sɯ
+イッキ iʔki
+実感 dʒiʔkɑn
+絶好 zeʔko̞ɯ
+ビギナーズラック biginɑːzɯ rɑʔkɯ
+カッコ kɑʔko
+ショック ʃo̞ʔkɯ
+クリック kɯriʔkɯ
+直結 tʃo̞ʔket͡sɯ
+引越 hiʔko̞ʃi
+スイッチバック sɯitʃibaʔkɯ
+直角 tʃoʔkɑkɯ
+熱狂 neʔkʲo̞ɯ
+悪化 ɑʔkɑ
+ファナックトヨタ ɸɑnɑʔkɯ tojotɑ
+学会 gɑʔkɑi
+モロッコ mo̞ro̞ʔko̞
+テクニック te̞kɯniʔkɯ
+活気づく kɑʔki zɯkɯ
+読解 doʔkɑi
+北海道電力 hoʔkɑidoː denrʲokɯ
+バイオメトリック bɑio metoriʔkɯ
+ブロック bɯro̞ʔkɯ
+ピッコロ piʔko̞ro̞
+特会 toʔkɑi
+北海道庁 hoʔkɑidoːtʃoː
+卓球 tɑʔkʲɯː
+マッコリ mɑʔkori
+復権 ɸɯʔke̞n
+パック pɑʔkɯ
+特許 to̞ʔkʲo̞
+絶好調 zeʔko̞ɯtʃo̞ɯ
+列挙 reʔkʲo̞
+ファナックコマツ ɸɑnɑʔkɯ komɑt͡sɯ
+楽曲 gɑʔkʲokɯ
+オートマチック oːtomɑtʃiʔkɯ
+出火 ʃɯʔkɑ
+中学校 tʃɯːgɑʔkoː
+雑貨 zɑʔkɑ
+客観 kʲɑʔkɑn
+牧歌 boʔkɑ
+ジャック dʒɑʔkɯ
+膝下 ʃiʔkɑ
+復活 ɸɯʔkɑt͡sɯ
+ラック rɑʔkɯ
+チェックイン tʃe̞ʔkɯ in
+ツッコ t͡sɯʔko̞
+日光 niʔko̞ɯ
+国家 koʔkɑ
+血行 keʔko̞ɯ
+ロッカー roʔkɑː
+復旧 ɸɯʔkʲɯː
+失格 ʃiʔkɑkɯ
+鉄鋼 teʔko̞ɯ
+各国 kɑʔkokɯ
+結構 keʔko̞ɯ
+括弧 kɑʔko
+復興 ɸɯʔko̞ɯ
+発給 hɑʔkʲɯː
+ペッカ peʔkɑ
+借金 ʃɑʔkin
+薬局 jɑʔkʲokɯ
+ベーシック be̞ːʃiʔkɯ
+特権 to̞ʔken
+北極圏 ho̞ʔkʲo̞kɯken
+発行 hɑʔkoː
+実行 dʒiʔko̞ɯ
+国境 ko̞ʔkʲo̞ɯ
+国会 koʔkɑi
+続行 zo̞ʔko̞ɯ
+出勤 ʃɯʔkin
+ホッケー ho̞ʔkeː
+特区 to̞ʔkɯ
+ハッカー hɑʔkɑː
+北海道大学 hoʔkɑidoː dɑigɑkɯ
+一環 iʔkɑn
+執行 ʃiʔko̞ɯ
+発掘 hɑʔkɯt͡sɯ
+日興 niʔko̞ɯ
+ブルドック bɯrɯdo̞ʔkɯ
+決行 keʔko̞ɯ
+アッコ ɑʔko
+物件 bɯʔke̞n
+ザッケローニ zɑʔkeroːni
+結局 keʔkʲo̞kɯ
+ネック ne̞ʔkɯ
+エルゴノミック erɯgo̞no̞miʔkɯ
+発覚 hɑʔkɑkɯ
+コロッケ ko̞ro̞ʔke
+末期 mɑʔki
+発揮 hɑʔki
+ザッカーバーグ zɑʔkɑːbɑːgɯ
+ファナック ɸɑnɑʔkɯ
+物価 bɯʔkɑ
+小中学校 ʃoːtʃɯːgɑʔkoː
+トラック torɑʔkɯ
+ザック zɑʔkɯ
+特訓 to̞ʔkɯn
+実家 dʒiʔkɑ
+菊花 kiʔkɑ
+活発 kɑʔpɑt͡sɯ
+日本航空 niʔpo̞n ko̞ɯkɯː
+アップ ɑʔpɯ
+吉報 kiʔpo̞ɯ
+鉄壁 te̞ʔpe̞ki
+ハッピーエンド hɑʔpiːendo
+日本語 niʔpo̞n go̞
+イチキュッパ itʃi kʲɯʔpɑ
+パワーアップ paɯaː aʔpɯ
+ポテトチップス potetociʔpɯsɯ
+バージョンアップ bɑːdʒon ɑʔpɯ
+絶壁 ze̞ʔpe̞ki
+切符 kiʔpɯ
+滅法 meʔpo̞ɯ
+アップデート ɑʔpɯdeːto
+圧迫 ɑʔpɑkɯ
+日本 niʔpo̞n
+タップ tɑʔpɯ
+スナップ sɯnaʔpɯ
+日本一 niʔpo̞n itʃi
+物販 bɯʔpɑn
+スキンシップ sɯkinshɪʔpɯ
+東日本 higɑʃi niʔpon
+チューリップ tʃɯːriʔpɯ
+ラインナップ rɑinnɑʔpɯ
+屈服 kɯʔpɯkɯ
+執筆 ʃiʔpit͡sɯ
+一般人 iʔpɑnnin
+一般 iʔpɑn
+ステップ sɯteʔpɯ
+ニャッパゲ ɲɑʔpɑge
+失敗談 ʃiʔpɑi dɑn
+サッパリ sɑʔpɑɾi
+六本木 ro̞ʔpo̞ngi
+出発 ʃɯʔpɑt͡sɯ
+スワップ sɯwaʔpɯ
+チェンジアップ tʃendʒi ɑʔpɯ
+一報 iʔpo̞ɯ
+一方 iʔpo̞ɯ
+ヨーロッパ joːroʔpɑ
+ギャップ gʲɑʔpɯ
+欠片 ke̞ʔpe̞n
+クリップ kɯriʔpɯ
+トップ to̞ʔpɯ
+カップル kɑʔpɯrɯ
+絶品 ze̞ʔpin
+ワールドカップ ɯaːrɯdokaʔpɯ
+ストップ安 sɯtoʔpɯjasɯ
+突破 toʔpɑ
+日本人 niʔpo̞nnin
+一服 iʔpɯkɯ
+一変 iʔpe̞n
+アップル ɑʔpɯrɯ
+一辺倒 iʔpento̞ɯ
+タイムアップ tɑimɯ ɑʔpɯ
+ヒップ hiʔpɯ
+出版 ʃɯʔpɑn
+アッパ ɑʔpɑ
+失敗 ʃiʔpɑi
+発泡 hɑʔpoː
+日本銀行 niʔpo̞n ginko̞ɯ
+ショッピング ʃo̞ʔpingɯ
+ショップ ʃo̞ʔpɯ
+一端 iʔpɑʃi
+コップ ko̞ʔpɯ
+危機一髪 kiki iʔpɑt͡sɯ
+出費 ʃɯʔpi
+発表 hɑʔpʲoː
+手一杯 te iʔpɑi
+大雑把 oːzɑʔpɑ
+グリップ gɯriʔpɯ
+リップ riʔpɯ
+トップクラス toʔpɯkɯrasɯ
+絶版 zeʔpɑn
+デベロッパー deberoʔpɑː
+カップ kɑʔpɯ
+札幌 sɑʔpoɾo
+北方 ho̞ʔpo̞ɯ
+逸品 iʔpin
+立派 riʔpɑ
+潔癖 ke̞ʔpe̞ki
+出品 ʃɯʔpin
+ストップ高 sɯtoʔpɯdaka
+勃発 boʔpɑt͡sɯ
+バックアップ bɑʔkɯɑʔpɯ
+発色 haʔshokɯ
+末席 mɑʔseki
+抜粋 bɯʔsɯi
+脱水 daʔsɯi
+欠食 kɛʔshokɯ
+結束 keʔsokɯ
+合宿 gaʔshɯkɯ
+決する keʔsɯɾɯ
+生粋 kiʔsɯi
+喫茶 kiʔsɑ
+渇水 kaʔsɯi
+掘削 kɯʔsakɯ
+立身出世 ɾiʔʃinʃɯʔse
+一昨年 iʔsakɯnɛn
+実績 dʑiʔse̞ki
+率先 so̞ʔsen
+出席 ʃɯʔseki
+一睡 iʔsɯi
+圧縮 aʔshɯkɯ
+バッシング baʔshiŋgɯ
+早速 saʔsokɯ
+実質 dʑiʔshitsɯ
+幕張メッセ makɯharimeʔse
+エッチ e̞tʔʃi
+ターゲット tɑːgeʔto
+フットサル fɯʔtosaɾɯ
+最も mo̞ʔto̞mo̞
+血統書付き keʔto̞ɯʃo̞t͡sɯki
+ツイッター t͡sɯiʔtɑː
+カット kɑʔto
+パケット pɑkeʔto
+コックピット ko̞ʔkɯpiʔto̞
+マーケット mɑːkeʔto
+密着 miʔtʃɑkɯ
+タッチアンドトライコーナー tɑʔtʃi ɑndo torɑi koːnɑː
+ポット po̞ʔto̞
+ペット peʔto̞
+出張 ʃɯʔtʃo̞ɯ
+ハッタリ hɑʔtɑri
+バット bɑʔto
+出店 ʃɯʔte̞n
+撤退 teʔtɑi
+立体 riʔtɑi
+バッチリ bɑʔtʃiri
+ジェット dʒeʔto̞
+シャッター ʃɑʔtɑː
+筆頭 hiʔto̞ɯ
+絶対 zeʔtɑi
+訴える ɯʔtɑerɯ
+納得 nɑʔtokɯ
+フットインリフレ ɸɯʔto̞ in riɸɯre
+チャット tʃɑʔto
+滅多に meʔtɑ ni
+アウトプット ɑɯtopɯʔto
+ナッツ nɑʔt͡sɯ
+ネット neʔto̞
+実態 dʒiʔtɑi
+実体 dʒiʔtɑi
+インターネット intɑːneʔto
+キャッチフレーズ kʲɑʔtʃiɸɯreːzɯ
+モケット mo̞keʔto̞
+キャッチボール kʲɑʔtʃi boːrɯ
+ゲット geʔto̞
+甲冑 kɑʔtʃɯː
+スポット sɯpoʔto
+全く mɑʔtɑkɯ
+マット mɑʔto
+熱中 ne̞ʔtʃɯː
+ホットドッグ ho̞ʔto̞ do̞ʔgɯ
+バッテリ bɑʔteri
+没頭 bo̞ʔto̞ɯ
+抹茶 mɑʔtʃɑ
+ジャケット dʒɑkeʔto
+匹敵 hiʔte̞ki
+タマゴッチ tɑmɑ goʔtʃi
+勝手 kɑʔte
+別途 beʔto̞
+一転 iʔte̞n
+アットホーム ɑʔto hoːmɯ
+失点 ʃiʔte̞n
+三つ mitʔt͡sɯ
+発達 hɑʔtɑt͡sɯ
+発展 hɑʔten
+パイロット pɑiroʔto
+一手 iʔte̞
+決着 keʔtʃɑkɯ
+不一致 ɸɯiʔtʃi
+御法度 gohɑʔto
+マルチタッチ mɑrɯtʃi tɑʔtʃi
+スロット sɯɾoʔto
+チケット tʃikeʔto̞
+熱湯 neʔto̞ɯ
+カーペット kɑːpeʔto
+ベケット bekeʔto̞
+出展 ʃɯʔte̞n
+キャッチャー kʲɑʔtʃɑː
+ポッチャリ poʔtʃɑri
+立地 riʔtʃi
+一途 iʔto̞
+越冬 eʔto̞ɯ
+プラットフォーム pɯrɑʔtoɸoːmɯ
+キット kiʔto̞
+ポケット po̞keʔto̞
+ヨット jo̞ʔto̞
+失墜 ʃiʔt͡sɯi
+ハット hɑʔto
+一寸 tʃo̞ʔto̞
+鳥取 to̞ʔto̞ri
+発注 hɑʔtʃɯː
+エチケット etʃikeʔto̞
+キッチン kiʔtʃin
+ハーゲンダッツジャパン hɑːgendɑʔt͡sɯ dʒɑpɑn
+デュエット djɯeʔto̞
+夫 o̞ʔto̞
+タブレット tɑbɯreʔto
+ダイエット dɑieʔto
+列島 reʔto̞ɯ
+失調 ʃiʔtʃo̞ɯ
+日通 niʔt͡sɯː
+メリット meriʔto̞
+ピッチャー piʔtʃɑː
+ピット piʔto̞
+タッチ tɑʔtʃi
+マッチ mɑʔtʃi
+デメリット demeriʔto̞
+日東電工 niʔto̞ɯ denko̞ɯ
+新日鉄 ʃinniʔte̞t͡sɯ
+モットー mo̞ʔto̞ː
+嫉妬 ʃiʔto̞
+訴え ɯʔtɑe
+圧倒的 ɑʔtoːteki
+雪駄 seʔtɕɑ
+ピッタリ piʔtɑri
+ヒット hiʔto̞
+納豆 nɑʔtoː
+レッツ re̞ʔt͡sɯ
+シルエット ʃirɯeʔto̞
+使い勝手 t͡sɯkɑi kɑʔte
+ロボット ro̞bo̞ʔto̞
+発端 hoʔtɑn
+ゾッと zo̞ʔto̞
+スポットライト sɯpoʔtoɾaito
+沸騰 ɸɯʔto̞ɯ
+キャッチング kʲɑʔtʃingɯ
+一体 iʔtɑi
+タッチパネル tɑʔtʃi pɑnerɯ
+抜擢 bɑʔteki
+ホット ho̞ʔto̞
+マスコット masɯkoʔto
+アルファベット ɑrɯɸɑbeʔto
+ベビーシッター bebii ʃiʔtɑː
+ピッチ piʔtʃi
+バッター bɑʔtɑː
+ジェット機 dʒeʔto̞ ki
+タイムリミット tɑimɯ rimiʔto
+一致 iʔtʃi
+バッティング bɑʔtingɯ
+ベッテル be̞ʔte̞rɯ
+レッテル re̞ʔte̞rɯ
+リッチ riʔtʃi
+ユナイテッド jɯnɑiteʔdo
+グッドウィル gɯʔdo̞ irɯ
+ハイブリッド hɑibɯriʔdo
+ベッド beʔdo̞
+パッド pɑʔdo
+ハリウッド hɑriɯʔdo
+ヘッドホン heʔdo̞ho̞n
+ヘッド heʔdo̞
+ピラミッド pirɑmiʔdo
+レッド reʔdo̞
+テッド teʔdo̞
+ブラッド bɯrɑʔdo
+グッド gɯʔdo̞
+ウッド ɯʔdo̞
+ピクニック pikniʔk
+スマートフォン smɑːtofon
+親切 ʃinsetsɯ
diff --git a/tests/collections/asr/decoding/rnnt_alignments_check.py b/tests/collections/asr/decoding/rnnt_alignments_check.py
index d44f7f8fd985..ec0656cbce49 100644
--- a/tests/collections/asr/decoding/rnnt_alignments_check.py
+++ b/tests/collections/asr/decoding/rnnt_alignments_check.py
@@ -17,29 +17,66 @@
# these tests outside of the CI machines environment, where test data is
# stored
-import os
+from pathlib import Path
+from typing import Union
+
import pytest
+import torch.cuda
from examples.asr.transcribe_speech import TranscriptionConfig
from omegaconf import OmegaConf
-from nemo.collections.asr.parts.utils.transcribe_utils import prepare_audio_data, setup_model
+from nemo.collections.asr.models import EncDecRNNTBPEModel
+from nemo.collections.asr.parts.utils.manifest_utils import read_manifest, write_manifest
+from nemo.collections.asr.parts.utils.transcribe_utils import prepare_audio_data
+
+DEVICES = []
+
+if torch.cuda.is_available():
+ DEVICES.append('cuda')
-TEST_DATA_PATH = "/home/TestData/an4_dataset/an4_val.json"
-PRETRAINED_MODEL_NAME = "stt_en_conformer_transducer_small"
+@pytest.fixture(scope="module")
+def stt_en_conformer_transducer_small_model():
+ model = EncDecRNNTBPEModel.from_pretrained(model_name="stt_en_conformer_transducer_small", map_location="cpu")
+ return model
-def get_rnnt_alignments(strategy: str, loop_labels: bool = True, use_cuda_graph_decoder=False, location="cuda"):
- cfg = OmegaConf.structured(TranscriptionConfig(pretrained_name=PRETRAINED_MODEL_NAME))
+
+@pytest.fixture(scope="module")
+def an4_val_manifest_corrected(tmp_path_factory, test_data_dir):
+ """
+ Correct an4_val manifest audio filepaths, e.g.,
+ "tests/data/asr/test/an4/wav/an440-mjgm-b.wav" -> test_data_dir / "test/an4/wav/an440-mjgm-b.wav"
+ """
+ an4_val_manifest_orig_path = Path(test_data_dir) / "asr/an4_val.json"
+ an4_val_manifest_corrected_path = tmp_path_factory.mktemp("manifests") / "an4_val_corrected.json"
+ an4_val_records = read_manifest(an4_val_manifest_orig_path)
+ for record in an4_val_records:
+ record["audio_filepath"] = record["audio_filepath"].replace(
+ "tests/data/asr", str(an4_val_manifest_orig_path.resolve().parent)
+ )
+ write_manifest(an4_val_manifest_corrected_path, an4_val_records)
+ return an4_val_manifest_corrected_path
+
+
+def get_rnnt_alignments(
+ strategy: str,
+ manifest_path: Union[Path, str],
+ model: EncDecRNNTBPEModel,
+ loop_labels: bool = True,
+ use_cuda_graph_decoder=False,
+ device="cuda",
+):
+ cfg = OmegaConf.structured(TranscriptionConfig())
cfg.rnnt_decoding.confidence_cfg.preserve_frame_confidence = True
cfg.rnnt_decoding.preserve_alignments = True
cfg.rnnt_decoding.strategy = strategy
if cfg.rnnt_decoding.strategy == "greedy_batch":
cfg.rnnt_decoding.greedy.loop_labels = loop_labels
cfg.rnnt_decoding.greedy.use_cuda_graph_decoder = use_cuda_graph_decoder
- cfg.dataset_manifest = TEST_DATA_PATH
+ cfg.dataset_manifest = str(manifest_path)
filepaths = prepare_audio_data(cfg)[0][:10] # selecting 10 files only
- model = setup_model(cfg, map_location=location)[0]
+ model = model.to(device)
model.change_decoding_strategy(cfg.rnnt_decoding)
transcriptions = model.transcribe(
@@ -72,16 +109,35 @@ def cleanup_local_folder():
# TODO: add the same tests for multi-blank RNNT decoding
-@pytest.mark.skipif(not os.path.exists('/home/TestData'), reason='Not a Jenkins machine')
+@pytest.mark.parametrize("device", DEVICES)
@pytest.mark.parametrize("loop_labels", [True, False])
@pytest.mark.parametrize("use_cuda_graph_decoder", [True, False])
-def test_rnnt_alignments(loop_labels: bool, use_cuda_graph_decoder: bool):
+@pytest.mark.with_downloads
+def test_rnnt_alignments(
+ loop_labels: bool,
+ use_cuda_graph_decoder: bool,
+ device: str,
+ an4_val_manifest_corrected,
+ stt_en_conformer_transducer_small_model,
+):
+ if use_cuda_graph_decoder and device != "cuda":
+ pytest.skip("CUDA decoder works only with CUDA")
if not loop_labels and use_cuda_graph_decoder:
pytest.skip("Frame-Looping algorithm with CUDA graphs does not yet support alignments")
# using greedy as baseline and comparing all other configurations to it
- ref_transcriptions = get_rnnt_alignments("greedy")
+ ref_transcriptions = get_rnnt_alignments(
+ "greedy",
+ manifest_path=an4_val_manifest_corrected,
+ model=stt_en_conformer_transducer_small_model,
+ device=device,
+ )
transcriptions = get_rnnt_alignments(
- "greedy_batch", loop_labels=loop_labels, use_cuda_graph_decoder=use_cuda_graph_decoder
+ "greedy_batch",
+ loop_labels=loop_labels,
+ use_cuda_graph_decoder=use_cuda_graph_decoder,
+ manifest_path=an4_val_manifest_corrected,
+ model=stt_en_conformer_transducer_small_model,
+ device=device,
)
# comparing that label sequence in alignments is exactly the same
# we can't compare logits as well, because they are expected to be
diff --git a/tests/collections/audio/test_audio_metrics.py b/tests/collections/audio/test_audio_metrics.py
index 2d693bc4ab20..578b67fc2479 100644
--- a/tests/collections/audio/test_audio_metrics.py
+++ b/tests/collections/audio/test_audio_metrics.py
@@ -16,6 +16,14 @@
from torchmetrics.audio.snr import SignalNoiseRatio
from nemo.collections.audio.metrics.audio import AudioMetricWrapper
+from nemo.collections.audio.metrics.squim import SquimMOSMetric, SquimObjectiveMetric
+
+try:
+ import torchaudio
+
+ HAVE_TORCHAUDIO = True
+except ModuleNotFoundError:
+ HAVE_TORCHAUDIO = False
class TestAudioMetricWrapper:
@@ -140,3 +148,126 @@ def test_channel(self, channel):
ref_metric.reset()
wrapped_metric.reset()
+
+
+class TestSquimMetrics:
+ @pytest.mark.unit
+ @pytest.mark.parametrize('fs', [16000, 24000])
+ def test_squim_mos(self, fs: int):
+ """Test Squim MOS metric"""
+ if HAVE_TORCHAUDIO:
+ # Setup
+ num_batches = 4
+ batch_size = 4
+ atol = 1e-6
+
+ # UUT
+ squim_mos_metric = SquimMOSMetric(fs=fs)
+
+ # Helper function
+ resampler = torchaudio.transforms.Resample(
+ orig_freq=fs,
+ new_freq=16000,
+ lowpass_filter_width=64,
+ rolloff=0.9475937167399596,
+ resampling_method='sinc_interp_kaiser',
+ beta=14.769656459379492,
+ )
+ squim_mos_model = torchaudio.pipelines.SQUIM_SUBJECTIVE.get_model()
+
+ def calculate_squim_mos(preds: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
+ if fs != 16000:
+ preds = resampler(preds)
+ target = resampler(target)
+
+ # Calculate MOS
+ mos_batch = squim_mos_model(preds, target)
+ return mos_batch
+
+ # Test
+ mos_sum = torch.tensor(0.0)
+
+ for n in range(num_batches):
+ preds = torch.randn(batch_size, fs)
+ target = torch.randn(batch_size, fs)
+
+ # UUT forward
+ squim_mos_metric.update(preds=preds, target=target)
+
+ # Golden
+ mos_golden = calculate_squim_mos(preds=preds, target=target)
+ # Accumulate
+ mos_sum += mos_golden.sum()
+
+ # Check the final value of the metric
+ mos_golden_final = mos_sum / (num_batches * batch_size)
+ assert torch.allclose(squim_mos_metric.compute(), mos_golden_final, atol=atol), f'Comparison failed'
+
+ else:
+ with pytest.raises(ModuleNotFoundError):
+ SquimMOSMetric(fs=fs)
+
+ @pytest.mark.unit
+ @pytest.mark.parametrize('metric', ['stoi', 'pesq', 'si_sdr'])
+ @pytest.mark.parametrize('fs', [16000, 24000])
+ def test_squim_objective(self, metric: str, fs: int):
+ """Test Squim objective metric"""
+ if HAVE_TORCHAUDIO:
+ # Setup
+ num_batches = 4
+ batch_size = 4
+ atol = 1e-6
+
+ # UUT
+ squim_objective_metric = SquimObjectiveMetric(fs=fs, metric=metric)
+
+ # Helper function
+ resampler = torchaudio.transforms.Resample(
+ orig_freq=fs,
+ new_freq=16000,
+ lowpass_filter_width=64,
+ rolloff=0.9475937167399596,
+ resampling_method='sinc_interp_kaiser',
+ beta=14.769656459379492,
+ )
+ squim_objective_model = torchaudio.pipelines.SQUIM_OBJECTIVE.get_model()
+
+ def calculate_squim_objective(preds: torch.Tensor) -> torch.Tensor:
+ if fs != 16000:
+ preds = resampler(preds)
+
+ # Calculate metric
+ stoi_batch, pesq_batch, si_sdr_batch = squim_objective_model(preds)
+
+ if metric == 'stoi':
+ return stoi_batch
+ elif metric == 'pesq':
+ return pesq_batch
+ elif metric == 'si_sdr':
+ return si_sdr_batch
+ else:
+ raise ValueError(f'Unknown metric {metric}')
+
+ # Test
+ metric_sum = torch.tensor(0.0)
+
+ for n in range(num_batches):
+ preds = torch.randn(batch_size, fs)
+
+ # UUT forward
+ squim_objective_metric.update(preds=preds, target=None)
+
+ # Golden
+ metric_golden = calculate_squim_objective(preds=preds)
+ # Accumulate
+ metric_sum += metric_golden.sum()
+
+ # Check the final value of the metric
+ metric_golden_final = metric_sum / (num_batches * batch_size)
+ assert torch.allclose(
+ squim_objective_metric.compute(), metric_golden_final, atol=atol
+ ), f'Comparison failed'
+
+ else:
+ with pytest.raises(ModuleNotFoundError):
+ SquimObjectiveMetric(fs=fs, metric=metric)
diff --git a/tests/collections/audio/test_audio_transforms.py b/tests/collections/audio/test_audio_transforms.py
index 342bb16e5b14..6a2d736f4c3f 100644
--- a/tests/collections/audio/test_audio_transforms.py
+++ b/tests/collections/audio/test_audio_transforms.py
@@ -18,7 +18,12 @@
import pytest
import torch
-from nemo.collections.audio.modules.transforms import AudioToSpectrogram, SpectrogramToAudio
+from nemo.collections.audio.modules.transforms import (
+ AudioToSpectrogram,
+ AudioToSpectrogramTA,
+ SpectrogramToAudio,
+ SpectrogramToAudioTA,
+)
try:
importlib.import_module('torchaudio')
@@ -29,6 +34,226 @@
class TestAudioSpectrogram:
+ @pytest.mark.unit
+ @pytest.mark.parametrize('fft_length', [64, 512])
+ @pytest.mark.parametrize('num_channels', [1, 3])
+ def test_audio_to_spec(self, fft_length: int, num_channels: int):
+ """Test output length for audio to spectrogram.
+
+ Create signals of arbitrary length and check output
+ length is matching the actual transform length.
+ """
+ hop_lengths = [fft_length // 2, fft_length // 3, fft_length // 4]
+ batch_size = 4
+ num_examples = 20
+ random_seed = 42
+ atol = 1e-6
+
+ _rng = np.random.default_rng(seed=random_seed)
+
+ for n in range(num_examples):
+
+ # Generate time-domain examples with different length
+ input_length = _rng.integers(low=fft_length, high=100 * fft_length, size=batch_size) # in samples
+ x = _rng.normal(size=(batch_size, num_channels, np.max(input_length)))
+ x = torch.tensor(x)
+ for b in range(batch_size):
+ x[b, :, input_length[b] :] = 0
+
+ for hop_length in hop_lengths:
+ # Prepare transform
+ audio2spec = AudioToSpectrogram(fft_length=fft_length, hop_length=hop_length)
+
+ # Transform the whole batch
+ batch_spec, batch_spec_len = audio2spec(input=x, input_length=torch.tensor(input_length))
+
+ for b in range(batch_size):
+
+ # Transform just the current example
+ b_spec, b_spec_len = audio2spec(input=x[b : b + 1, :, : input_length[b]])
+ actual_len = b_spec.size(-1)
+
+ # Check lengths
+ assert (
+ actual_len == b_spec_len
+ ), f'Output length not matching for example ({n}, {b}) with length {input_length[n]} (hop_length={hop_length}): true {actual_len} vs calculated {b_spec_len}.'
+
+ assert (
+ actual_len == batch_spec_len[b]
+ ), f'Output length not matching for example ({n}, {b}) with length {input_length[n]} (hop_length={hop_length}): true {actual_len} vs calculated batch len {batch_spec_len[b]}.'
+
+ # Make sure transforming a batch is the same as transforming individual examples
+ assert torch.allclose(
+ batch_spec[b, ..., :actual_len], b_spec, atol=atol
+ ), f'Spectrograms not matching for example ({n}, {b}) with length {input_length[b]} (hop_length={hop_length})'
+
+ @pytest.mark.unit
+ @pytest.mark.parametrize('fft_length', [64, 512])
+ @pytest.mark.parametrize('num_channels', [1, 3])
+ def test_spec_to_audio(self, fft_length: int, num_channels: int):
+ """Test output length for spectrogram to audio.
+
+ Create signals of arbitrary length and check output
+ length is matching the actual transform length.
+ """
+ hop_lengths = [fft_length // 2, fft_length // 3, fft_length // 4]
+ batch_size = 4
+ num_examples = 20
+ random_seed = 42
+ atol = 1e-6
+
+ _rng = np.random.default_rng(seed=random_seed)
+
+ for n in range(num_examples):
+
+ # Generate spectrogram examples with different lengths
+ input_length = _rng.integers(low=10, high=100, size=batch_size) # in frames
+ input_shape = (batch_size, num_channels, fft_length // 2 + 1, np.max(input_length))
+ spec = _rng.normal(size=input_shape) + 1j * _rng.normal(size=input_shape)
+ spec = torch.tensor(spec)
+ spec[..., 0, :] = spec[..., 0, :].real
+ spec[..., -1, :] = spec[..., -1, :].real
+ for b in range(batch_size):
+ spec[b, ..., input_length[b] :] = 0
+
+ for hop_length in hop_lengths:
+ # Prepare transform
+ spec2audio = SpectrogramToAudio(fft_length=fft_length, hop_length=hop_length)
+
+ # Transform the whole batch
+ batch_x, batch_x_len = spec2audio(input=spec, input_length=torch.tensor(input_length))
+
+ for b in range(batch_size):
+
+ # Transform just the current example
+ b_x, b_x_len = spec2audio(input=spec[b : b + 1, ..., : input_length[b]])
+
+ actual_len = b_x.size(-1)
+
+ # Check lengths
+ assert (
+ b_x_len == actual_len
+ ), f'Output length not matching for example ({n}, {b}) with {input_length[b]} frames (hop_length={hop_length}): true {actual_len} vs calculated {b_x_len}.'
+
+ assert (
+ batch_x_len[b] == actual_len
+ ), f'Output length not matching for example ({n}, {b}) with {input_length[b]} frames (hop_length={hop_length}): true {actual_len} vs calculated batch {batch_x_len[b]}.'
+
+ # Make sure transforming a batch is the same as transforming individual examples
+ if input_length[b] < spec.size(-1):
+ # Discard the last bit of the signal which differs due to number of frames in batch (with zero padded frames) vs individual (only valid frames).
+ # The reason for this difference is normalization with `window_sumsquare` of the inverse STFT. More specifically,
+ # batched and non-batched transform are using on a different number of frames.
+ tail_length = max(fft_length // 2 - hop_length, 0)
+ else:
+ tail_length = 0
+ valid_len = actual_len - tail_length
+ batch_x_valid = batch_x[b, :, :valid_len]
+ b_x_valid = b_x[..., :valid_len]
+ assert torch.allclose(
+ batch_x_valid, b_x_valid, atol=atol
+ ), f'Signals not matching for example ({n}, {b}) with length {input_length[b]} (hop_length={hop_length}): max abs diff {torch.max(torch.abs(batch_x_valid-b_x_valid))} at {torch.argmax(torch.abs(batch_x_valid-b_x_valid))}'
+
+ @pytest.mark.unit
+ @pytest.mark.parametrize('fft_length', [128, 1024])
+ @pytest.mark.parametrize('num_channels', [1, 4])
+ @pytest.mark.parametrize('magnitude_power', [0.5, 1, 2])
+ @pytest.mark.parametrize('scale', [0.1, 1.0])
+ def test_audio_to_spectrogram_reconstruction(
+ self, fft_length: int, num_channels: int, magnitude_power: float, scale: float
+ ):
+ """Test analysis and synthesis transform result in a perfect reconstruction."""
+ batch_size = 4
+ num_samples = fft_length * 50
+ num_examples = 25
+ random_seed = 42
+ atol = 1e-6
+
+ _rng = np.random.default_rng(seed=random_seed)
+
+ hop_lengths = [fft_length // 2, fft_length // 4]
+
+ for hop_length in hop_lengths:
+ audio2spec = AudioToSpectrogram(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+ spec2audio = SpectrogramToAudio(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+
+ for n in range(num_examples):
+ x = _rng.normal(size=(batch_size, num_channels, num_samples))
+
+ x_spec, x_spec_length = audio2spec(input=torch.Tensor(x))
+ x_hat, x_hat_length = spec2audio(input=x_spec, input_length=x_spec_length)
+
+ assert np.allclose(
+ x_hat.cpu().detach().numpy(), x, atol=atol
+ ), f'Reconstructed not matching for example {n} (hop length {hop_length})'
+
+ @pytest.mark.unit
+ @pytest.mark.skipif(not HAVE_TORCHAUDIO, reason="Modules in this test require torchaudio")
+ @pytest.mark.parametrize('fft_length', [128, 1024])
+ @pytest.mark.parametrize('num_channels', [1, 4])
+ @pytest.mark.parametrize('magnitude_power', [0.5, 1, 2])
+ @pytest.mark.parametrize('scale', [0.1, 1.0])
+ def test_match_torchaudio(self, fft_length: int, num_channels: int, magnitude_power: float, scale: float):
+ """Test analysis and synthesis transforms match torchaudio implementation."""
+ batch_size = 4
+ num_samples = fft_length * 50
+ num_examples = 25
+ random_seed = 42
+ atol = 1e-6
+
+ _rng = np.random.default_rng(seed=random_seed)
+
+ hop_lengths = [fft_length // 2, fft_length // 4]
+
+ for hop_length in hop_lengths:
+ audio2spec = AudioToSpectrogram(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+ spec2audio = SpectrogramToAudio(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+
+ # Torchaudio versions
+ audio2spec_ta = AudioToSpectrogramTA(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+ spec2audio_ta = SpectrogramToAudioTA(
+ fft_length=fft_length, hop_length=hop_length, magnitude_power=magnitude_power, scale=scale
+ )
+
+ for n in range(num_examples):
+ x = _rng.normal(size=(batch_size, num_channels, num_samples))
+
+ # Test analysis
+ x_spec, x_spec_length = audio2spec(input=torch.Tensor(x))
+ x_spec_ta, x_spec_length_ta = audio2spec_ta(input=torch.Tensor(x))
+
+ assert torch.allclose(
+ x_spec, x_spec_ta, atol=atol
+ ), f'Analysis not matching for example {n} (hop length {hop_length})'
+
+ assert torch.equal(
+ x_spec_length, x_spec_length_ta
+ ), f'Analysis length not matching for example {n} (hop length {hop_length})'
+
+ # Test synthesis
+ x_hat, x_hat_length = spec2audio(input=x_spec, input_length=x_spec_length)
+ x_hat_ta, x_hat_length_ta = spec2audio_ta(input=x_spec_ta, input_length=x_spec_length_ta)
+
+ assert torch.allclose(
+ x_hat, x_hat_ta, atol=atol
+ ), f'Synthesis not matching for example {n} (hop length {hop_length})'
+
+ assert torch.equal(
+ x_hat_length, x_hat_length_ta
+ ), f'Synthesis length not matching for example {n} (hop length {hop_length})'
+
+
+class TestAudioSpectrogramTA:
@pytest.mark.unit
@pytest.mark.skipif(not HAVE_TORCHAUDIO, reason="Modules in this test require torchaudio")
@pytest.mark.parametrize('fft_length', [64, 512])
diff --git a/tests/collections/common/test_metrics.py b/tests/collections/common/test_metrics.py
index f9005232a017..931a3a2f2497 100644
--- a/tests/collections/common/test_metrics.py
+++ b/tests/collections/common/test_metrics.py
@@ -28,7 +28,9 @@
class TestCommonMetrics:
- top_k_logits = torch.tensor([[0.1, 0.3, 0.2, 0.0], [0.9, 0.6, 0.2, 0.3], [0.2, 0.1, 0.4, 0.3]],) # 1 # 0 # 2
+ top_k_logits = torch.tensor(
+ [[0.1, 0.3, 0.2, 0.0], [0.9, 0.6, 0.2, 0.3], [0.2, 0.1, 0.4, 0.3]],
+ ) # 1 # 0 # 2
@pytest.mark.unit
def test_top_1_accuracy(self):
@@ -130,7 +132,10 @@ def test_top_1_accuracy_distributed_uneven_batch(self):
class TestPerplexity(PerplexityTester):
def test_perplexity(self, ddp, dist_sync_on_step, probs, logits):
self.run_class_perplexity_test(
- ddp=ddp, probs=probs, logits=logits, dist_sync_on_step=dist_sync_on_step,
+ ddp=ddp,
+ probs=probs,
+ logits=logits,
+ dist_sync_on_step=dist_sync_on_step,
)
diff --git a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py
index 2e2f9bdaaf36..c5bb6c905a9a 100644
--- a/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py
+++ b/tests/collections/common/tokenizers/text_to_speech/test_tts_tokenizers.py
@@ -20,9 +20,12 @@
GermanCharsTokenizer,
IPATokenizer,
ItalianCharsTokenizer,
+ JapanesePhonemeTokenizer,
SpanishCharsTokenizer,
+ VietnameseCharsTokenizer,
)
from nemo.collections.tts.g2p.models.i18n_ipa import IpaG2p
+from nemo.collections.tts.g2p.models.ja_jp_ipa import JapaneseG2p
class TestTTSTokenizers:
@@ -44,6 +47,10 @@ class TestTTSTokenizers:
"LE": ["lˈə-"],
"MONDE": ["mˈɔ̃d"],
}
+ PHONEME_DICT_JA = {
+ "ハロー": ["haɾoː"],
+ "ワールド": ["wa:ɾdo"],
+ }
@staticmethod
def _parse_text(tokenizer, text):
@@ -124,6 +131,18 @@ def test_spanish_chars_tokenizer(self):
assert chars == expected_output
assert len(tokens) == len(input_text)
+ @pytest.mark.run_only_on('CPU')
+ @pytest.mark.unit
+ def test_vietnamese_chars_tokenizer(self):
+ input_text = "Xin chào các bạn."
+ expected_output = "xin chào các bạn."
+
+ tokenizer = VietnameseCharsTokenizer()
+ chars, tokens = self._parse_text(tokenizer, input_text)
+
+ assert chars == expected_output
+ assert len(tokens) == len(input_text)
+
@pytest.mark.run_only_on('CPU')
@pytest.mark.unit
def test_french_chars_tokenizer(self):
@@ -252,3 +271,15 @@ def test_ipa_tokenizer_fixed_vocab(self):
chars, tokens = self._parse_text(tokenizer, "Hello, wound")
expected_output = "HELLO, ˈwund"
assert chars == expected_output
+
+ @pytest.mark.run_only_on('CPU')
+ @pytest.mark.unit
+ def test_japanese_phoneme_tokenizer(self):
+ input_text = "ハロー ワールド."
+ expected_output = "haɾoː wa:ɾdo."
+ g2p = JapaneseG2p(phoneme_dict=self.PHONEME_DICT_JA, word_segmenter="janome")
+
+ tokenizer = JapanesePhonemeTokenizer(g2p=g2p)
+ chars, tokens = self._parse_text(tokenizer, input_text)
+
+ assert chars == expected_output
diff --git a/tests/collections/llm/gpt/data/test_pre_training_data.py b/tests/collections/llm/gpt/data/test_pre_training_data.py
new file mode 100644
index 000000000000..c42434bbdf31
--- /dev/null
+++ b/tests/collections/llm/gpt/data/test_pre_training_data.py
@@ -0,0 +1,66 @@
+import pytest
+
+import nemo.lightning as nl
+from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule
+from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
+
+DATA_PATH = "/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document"
+VOCAB_PATH = "/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json"
+MERGES_PATH = "/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt"
+
+
+@pytest.fixture
+def tokenizer():
+ return get_nmt_tokenizer(
+ "megatron",
+ "GPT2BPETokenizer",
+ vocab_file=VOCAB_PATH,
+ merges_file=MERGES_PATH,
+ )
+
+
+@pytest.fixture
+def trainer():
+ return nl.Trainer(
+ accelerator="cpu",
+ max_steps=1,
+ )
+
+
+def test_single_data_distribution(tokenizer, trainer):
+
+ data = PreTrainingDataModule(
+ paths=[DATA_PATH],
+ seq_length=512,
+ micro_batch_size=2,
+ global_batch_size=2,
+ tokenizer=tokenizer,
+ )
+ data.trainer = trainer
+
+ ## AssertioneError because we are trying to do eval on the whole
+ ## dataset with just a single distribution
+ with pytest.raises(AssertionError):
+ data.setup(stage="dummy")
+
+ trainer.limit_val_batches = 5
+ ## this should succeed
+ data.setup(stage="dummy")
+
+
+def test_multiple_data_distributions(tokenizer, trainer):
+ data = PreTrainingDataModule(
+ paths={
+ "train": ['1', DATA_PATH],
+ "validation": [DATA_PATH, DATA_PATH],
+ "test": ['1', DATA_PATH],
+ },
+ seq_length=512,
+ micro_batch_size=2,
+ global_batch_size=2,
+ tokenizer=tokenizer,
+ )
+ data.trainer = trainer
+
+ ## this should succeed
+ data.setup(stage="dummy")
diff --git a/tests/core/test_save_restore.py b/tests/core/test_save_restore.py
index 57cbe94b60d7..394ced55a452 100644
--- a/tests/core/test_save_restore.py
+++ b/tests/core/test_save_restore.py
@@ -19,7 +19,6 @@
import pytest
import torch
-from huggingface_hub.hf_api import ModelFilter
from omegaconf import DictConfig, OmegaConf, open_dict
from nemo.collections.asr.models import EncDecCTCModel, EncDecCTCModelBPE
@@ -1324,8 +1323,8 @@ class MockModelV2(MockModel):
@pytest.mark.unit
def test_hf_model_filter(self):
filt = ModelPT.get_hf_model_filter()
- assert isinstance(filt, ModelFilter)
- assert filt.library == 'nemo'
+ assert isinstance(filt, dict)
+ assert filt['library'] == 'nemo'
@pytest.mark.with_downloads()
@pytest.mark.unit
@@ -1334,10 +1333,12 @@ def test_hf_model_info(self):
# check no override results
model_infos = ModelPT.search_huggingface_models(model_filter=None)
+ model_infos = [next(model_infos) for _ in range(5)]
assert len(model_infos) > 0
# check with default override results (should match above)
default_model_infos = ModelPT.search_huggingface_models(model_filter=filt)
+ default_model_infos = [next(default_model_infos) for _ in range(5)]
assert len(model_infos) == len(default_model_infos)
@pytest.mark.pleasefixme()
@@ -1348,13 +1349,12 @@ def test_hf_model_info_with_card_data(self):
# check no override results
model_infos = ModelPT.search_huggingface_models(model_filter=filt)
+ model_infos = [next(model_infos) for _ in range(5)]
assert len(model_infos) > 0
- assert not hasattr(model_infos[0], 'cardData')
# check overriden defaults
- filt.resolve_card_info = True
+ filt['cardData'] = True
model_infos = ModelPT.search_huggingface_models(model_filter=filt)
- assert len(model_infos) > 0
for info in model_infos:
if hasattr(info, 'cardData'):
@@ -1368,11 +1368,13 @@ def test_hf_model_info_with_limited_results(self):
# check no override results
model_infos = ModelPT.search_huggingface_models(model_filter=filt)
+ model_infos = [next(model_infos) for _ in range(6)]
assert len(model_infos) > 0
# check overriden defaults
- filt.limit_results = 5
+ filt['limit'] = 5
new_model_infos = ModelPT.search_huggingface_models(model_filter=filt)
+ new_model_infos = list(new_model_infos)
assert len(new_model_infos) <= 5
assert len(new_model_infos) < len(model_infos)
diff --git a/tests/core/test_straggler_det.py b/tests/core/test_straggler_det.py
index 53ba37ac28bb..ee5222854889 100644
--- a/tests/core/test_straggler_det.py
+++ b/tests/core/test_straggler_det.py
@@ -56,12 +56,12 @@ def on_train_start(self):
rank = torch.distributed.get_rank()
def train_dataloader(self):
- dataset = OnesDataset(128)
- return torch.utils.data.DataLoader(dataset, batch_size=2, num_workers=8)
+ dataset = OnesDataset(1024 * 1024)
+ return torch.utils.data.DataLoader(dataset, batch_size=2, num_workers=2)
def val_dataloader(self):
- dataset = OnesDataset(128)
- return torch.utils.data.DataLoader(dataset, batch_size=2, num_workers=8)
+ dataset = OnesDataset(128 * 1024)
+ return torch.utils.data.DataLoader(dataset, batch_size=2, num_workers=2)
def forward(self, batch):
output = self.l1(batch)
diff --git a/tests/export/run.sh b/tests/export/run.sh
index e534e4e87ee9..a2366f0634ea 100644
--- a/tests/export/run.sh
+++ b/tests/export/run.sh
@@ -36,12 +36,9 @@ python tests/export/nemo_export.py --model_name LLAMA2-13B-base-int4 --existing_
python tests/export/nemo_export.py --model_name LLAMA2-70B-base --existing_test_models --min_tps 2 --max_tps 8
python tests/export/nemo_export.py --model_name LLAMA2-70B-base-fp8 --existing_test_models --min_tps 8 --max_tps 8
python tests/export/nemo_export.py --model_name LLAMA2-70B-base-int4 --existing_test_models --min_tps 8 --max_tps 8
-python tests/export/nemo_export.py --model_name NV-GPT-8B-Base-4k --existing_test_models --min_tps 1 --max_tps 8
-python tests/export/nemo_export.py --model_name NV-GPT-8B-QA-4k --existing_test_models --min_tps 1 --max_tps 8
-python tests/export/nemo_export.py --model_name NV-GPT-8B-Chat-4k-SFT --existing_test_models --min_tps 1 --max_tps 8
-python tests/export/nemo_export.py --model_name NV-GPT-8B-Chat-4k-RLHF --existing_test_models --min_tps 1 --max_tps 8
-python tests/export/nemo_export.py --model_name NV-GPT-8B-Chat-4k-SteerLM --existing_test_models --min_tps 1 --max_tps 8
-python tests/export/nemo_export.py --model_name FALCON-7B-base --existing_test_models --min_tps 1 --max_tps 2
+python tests/export/nemo_export.py --model_name FALCON-7B-base --existing_test_models --min_tps 1 --max_tps 1
python tests/export/nemo_export.py --model_name FALCON-40B-base --existing_test_models --min_tps 2 --max_tps 8
python tests/export/nemo_export.py --model_name STARCODER1-15B-base --existing_test_models --min_tps 1 --max_tps 1
-python tests/export/nemo_export.py --model_name GEMMA-base --existing_test_models --min_tps 1 --max_tps 1
\ No newline at end of file
+python tests/export/nemo_export.py --model_name STARCODER2-15B-base --existing_test_models --min_tps 1 --max_tps 1
+python tests/export/nemo_export.py --model_name GEMMA-base --existing_test_models --min_tps 1 --max_tps 1
+python tests/export/nemo_export.py --model_name Nemotron3-22B-base-32k --existing_test_models --min_tps 2
\ No newline at end of file
diff --git a/tests/infer_data_path.py b/tests/infer_data_path.py
index 45850dcb366a..4125e77c0a1b 100644
--- a/tests/infer_data_path.py
+++ b/tests/infer_data_path.py
@@ -19,125 +19,22 @@
def get_infer_test_data():
test_data = {}
- test_data["NV-GPT-8B-Base-4k"] = {}
- test_data["NV-GPT-8B-Base-4k"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-Base-4k"]["min_tps"] = 1
- test_data["NV-GPT-8B-Base-4k"]["location"] = "Local"
- test_data["NV-GPT-8B-Base-4k"]["model_dir"] = "/tmp/NV-GPT-8B-Base-4k/nv-gpt-8b-base-4k_v1.0/"
- test_data["NV-GPT-8B-Base-4k"][
+ test_data["Nemotron3-22B-base-32k"] = {}
+ test_data["Nemotron3-22B-base-32k"]["model_type"] = "gptnext"
+ test_data["Nemotron3-22B-base-32k"]["min_tps"] = 2
+ test_data["Nemotron3-22B-base-32k"]["location"] = "Local"
+ test_data["Nemotron3-22B-base-32k"]["model_dir"] = "/tmp/Nemotron3-22B-base-32k/"
+ test_data["Nemotron3-22B-base-32k"][
"checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-Base-4k/nv-gpt-8b-base-4k_v1.0/NV-GPT-8B-Base-4k.nemo"
- test_data["NV-GPT-8B-Base-4k"]["p_tuning_checkpoint"] = "/opt/checkpoints/NV-GPT-8B-PTuning/nv-gpt-8B-ptuning.nemo"
- test_data["NV-GPT-8B-Base-4k"]["prompt_template"] = [
+ ] = "/opt/checkpoints/nemotron-3-22b-base-32k_v1.0/mcore-gpt3-22b-3_8T-pi32k-3_5T-cont-10k.nemo"
+ test_data["Nemotron3-22B-base-32k"]["prompt_template"] = [
"The capital of France is",
"Largest animal in the sea is",
"Fastest animal in the world is",
]
- test_data["NV-GPT-8B-Base-4k"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-Base-4k"]["max_output_len"] = 128
- test_data["NV-GPT-8B-Base-4k"]["max_batch_size"] = 10
-
- test_data["NV-GPT-8B-Base-16k"] = {}
- test_data["NV-GPT-8B-Base-16k"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-Base-16k"]["min_tps"] = 1
- test_data["NV-GPT-8B-Base-16k"]["location"] = "Local"
- test_data["NV-GPT-8B-Base-16k"]["model_dir"] = "/tmp/NV-GPT-8B-Base-16k/nv-gpt-8b-base-16k_v1.0/"
- test_data["NV-GPT-8B-Base-16k"][
- "checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-Base-16k/nv-gpt-8b-base-16k_v1.0/NV-GPT-8B-Base-16k.nemo"
- test_data["NV-GPT-8B-Base-16k"]["prompt_template"] = [
- "The capital of France is",
- "Largest animal in the sea is",
- "Fastest animal in the world is",
- ]
- test_data["NV-GPT-8B-Base-16k"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-Base-16k"]["max_output_len"] = 128
- test_data["NV-GPT-8B-Base-16k"]["max_batch_size"] = 20
-
- test_data["NV-GPT-8B-QA-4k"] = {}
- test_data["NV-GPT-8B-QA-4k"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-QA-4k"]["min_tps"] = 1
- test_data["NV-GPT-8B-QA-4k"]["location"] = "Local"
- test_data["NV-GPT-8B-QA-4k"]["model_dir"] = "/tmp/NV-GPT-8B-QA-4k/nv-gpt-8b-qa-4k_v1.0/"
- test_data["NV-GPT-8B-QA-4k"][
- "checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-QA-4k/nv-gpt-8b-qa-4k_v1.0/NV-GPT-8B-QA-4k.nemo"
- test_data["NV-GPT-8B-QA-4k"]["prompt_template"] = [
- "What is the capital of France?",
- "What is the largest animal in the sea?",
- "What is the fastest animal in the world?",
- ]
- test_data["NV-GPT-8B-QA-4k"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-QA-4k"]["max_output_len"] = 96
- test_data["NV-GPT-8B-QA-4k"]["max_batch_size"] = 20
-
- test_data["NV-GPT-8B-Chat-4k-SFT"] = {}
- test_data["NV-GPT-8B-Chat-4k-SFT"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-Chat-4k-SFT"]["min_tps"] = 1
- test_data["NV-GPT-8B-Chat-4k-SFT"]["location"] = "Local"
- test_data["NV-GPT-8B-Chat-4k-SFT"]["model_dir"] = "/tmp/NV-GPT-8B-Chat-4k-SFT/nv-gpt-8b-chat-4k-sft_v1.0/"
- test_data["NV-GPT-8B-Chat-4k-SFT"][
- "checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-Chat-4k-SFT/nv-gpt-8b-chat-4k-sft_v1.0/NV-GPT-8B-Chat-4k-SFT.nemo"
- test_data["NV-GPT-8B-Chat-4k-SFT"]["prompt_template"] = [
- "What is the capital of France?",
- "What is the largest animal in the sea?",
- "What is the fastest animal in the world?",
- ]
- test_data["NV-GPT-8B-Chat-4k-SFT"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-Chat-4k-SFT"]["max_output_len"] = 256
- test_data["NV-GPT-8B-Chat-4k-SFT"]["max_batch_size"] = 5
-
- test_data["NV-GPT-8B-Chat-4k-RLHF"] = {}
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["min_tps"] = 1
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["location"] = "Local"
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["model_dir"] = "/tmp/NV-GPT-8B-Chat-4k-RLHF/nv-gpt-8b-chat-4k-rlhf_v1.0/"
- test_data["NV-GPT-8B-Chat-4k-RLHF"][
- "checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-Chat-4k-RLHF/nv-gpt-8b-chat-4k-rlhf_v1.0/NV-GPT-8B-Chat-4k-RLHF.nemo"
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["prompt_template"] = [
- "What is the capital of France?",
- "What is the largest animal in the sea?",
- "What is the fastest animal in the world?",
- ]
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["max_output_len"] = 128
- test_data["NV-GPT-8B-Chat-4k-RLHF"]["max_batch_size"] = 10
-
- test_data["NV-GPT-8B-Chat-4k-SteerLM"] = {}
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["model_type"] = "gptnext"
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["min_tps"] = 1
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["location"] = "Local"
- test_data["NV-GPT-8B-Chat-4k-SteerLM"][
- "model_dir"
- ] = "/tmp/NV-GPT-8B-Chat-4k-SteerLM/nv-gpt-8b-chat-4k-steerlm_v1.0/"
- test_data["NV-GPT-8B-Chat-4k-SteerLM"][
- "checkpoint"
- ] = "/opt/checkpoints/NV-GPT-8B-Chat-4k-SteerLM/nv-gpt-8b-chat-4k-steerlm_v1.0/NV-GPT-8B-Chat-4k-SteerLM.nemo"
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["prompt_template"] = [
- "What is the capital of France?",
- "What is the largest animal in the sea?",
- "What is the fastest animal in the world?",
- ]
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["max_output_len"] = 128
- test_data["NV-GPT-8B-Chat-4k-SteerLM"]["max_batch_size"] = 10
-
- test_data["GPT-43B-Base"] = {}
- test_data["GPT-43B-Base"]["model_type"] = "gptnext"
- test_data["GPT-43B-Base"]["min_tps"] = 2
- test_data["GPT-43B-Base"]["location"] = "Local"
- test_data["GPT-43B-Base"]["model_dir"] = "/tmp/GPT-43B-Base/gpt-43B-base/"
- test_data["GPT-43B-Base"]["checkpoint"] = "/opt/checkpoints/GPT-43B-Base/gpt-43B-base.nemo"
- test_data["GPT-43B-Base"]["prompt_template"] = [
- "The capital of France is",
- "Largest animal in the sea is",
- "Fastest animal in the world is",
- ]
- test_data["GPT-43B-Base"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
- test_data["GPT-43B-Base"]["max_output_len"] = 128
- test_data["GPT-43B-Base"]["max_batch_size"] = 10
+ test_data["Nemotron3-22B-base-32k"]["expected_keyword"] = ["Paris", "Whale", "Cheetah"]
+ test_data["Nemotron3-22B-base-32k"]["max_output_len"] = 128
+ test_data["Nemotron3-22B-base-32k"]["max_batch_size"] = 10
test_data["LLAMA2-7B-base"] = {}
test_data["LLAMA2-7B-base"]["model_type"] = "llama"
@@ -367,6 +264,17 @@ def get_infer_test_data():
test_data["STARCODER1-15B-base"]["max_output_len"] = 128
test_data["STARCODER1-15B-base"]["max_batch_size"] = 5
+ test_data["STARCODER2-15B-base"] = {}
+ test_data["STARCODER2-15B-base"]["model_type"] = "starcoder"
+ test_data["STARCODER2-15B-base"]["min_tps"] = 1
+ test_data["STARCODER2-15B-base"]["location"] = "Local"
+ test_data["STARCODER2-15B-base"]["model_dir"] = "/tmp/STARCODER2-15B-base/trt_llm_model-1/"
+ test_data["STARCODER2-15B-base"]["checkpoint"] = "/opt/checkpoints/starcoder-2_15b_4k_vfinal/4194b.nemo"
+ test_data["STARCODER2-15B-base"]["prompt_template"] = ["def fibonnaci(n"]
+ test_data["STARCODER2-15B-base"]["expected_keyword"] = ["fibonnaci"]
+ test_data["STARCODER2-15B-base"]["max_output_len"] = 128
+ test_data["STARCODER2-15B-base"]["max_batch_size"] = 5
+
test_data["GEMMA-base"] = {}
test_data["GEMMA-base"]["model_type"] = "gemma"
test_data["GEMMA-base"]["min_tps"] = 1
diff --git a/tests/lightning/test_strategy_lib.py b/tests/lightning/test_strategy_lib.py
index b59930ab023d..6a63450f37df 100644
--- a/tests/lightning/test_strategy_lib.py
+++ b/tests/lightning/test_strategy_lib.py
@@ -1,8 +1,9 @@
from unittest.mock import ANY, MagicMock, patch
+import torch
from torch import nn
-from nemo.lightning import _strategy_lib # , DataConfig
+from nemo.lightning import MegatronStrategy, _strategy_lib # , DataConfig
class Identity(nn.Identity):
@@ -15,6 +16,33 @@ def copy(self):
return WithCopy()
+def test_set_model_parallel_attributes() -> None:
+ strategy = MegatronStrategy(
+ pipeline_model_parallel_size=2,
+ expert_model_parallel_size=2,
+ sequence_parallel=False,
+ pipeline_dtype=torch.float32,
+ )
+ from megatron.core.transformer.transformer_config import TransformerConfig
+
+ class DummyModel:
+ def __init__(self):
+ self.config = TransformerConfig(hidden_size=128, num_attention_heads=2, num_layers=2)
+
+ def configure_model(self):
+ pass
+
+ model = DummyModel()
+ assert model.config.pipeline_model_parallel_size != 2
+ assert model.config.expert_model_parallel_size != 2
+ assert model.config.pipeline_dtype != torch.float32
+ _strategy_lib.set_model_parallel_attributes(model, strategy.parallelism)
+ assert model.config.pipeline_model_parallel_size == 2
+ assert model.config.expert_model_parallel_size == 2
+ assert model.config.sequence_parallel == False
+ assert model.config.pipeline_dtype == torch.float32
+
+
@patch('nemo.collections.nlp.modules.common.megatron.megatron_init.initialize_model_parallel_for_nemo')
def test_init_parallel_ranks(mock_initialize_model_parallel) -> None:
from nemo.utils import AppState
@@ -23,6 +51,8 @@ def test_init_parallel_ranks(mock_initialize_model_parallel) -> None:
app_state.tensor_model_parallel_size = 2
app_state.pipeline_model_parallel_size = 3
+ app_state.context_parallel_size = 2
+ app_state.expert_model_parallel_size = 2
app_state.global_rank = 1
app_state.local_rank = 0
@@ -30,11 +60,18 @@ def test_init_parallel_ranks(mock_initialize_model_parallel) -> None:
mock_parallel_config.tensor_model_parallel_size = 2
mock_parallel_config.pipeline_model_parallel_size = 3
mock_parallel_config.virtual_pipeline_model_parallel_size = 4
- mock_parallel_config.ub_tp_comm_overlap = False
+ mock_parallel_config.context_parallel_size = 2
+ mock_parallel_config.expert_model_parallel_size = 2
+ mock_parallel_config.tp_comm_overlap = False
mock_parallel_config.pipeline_model_parallel_split_rank = None
_strategy_lib.init_parallel_ranks(
- world_size=2, global_rank=1, local_rank=0, parallel_config=mock_parallel_config, seed=1234, fp8=False,
+ world_size=2,
+ global_rank=1,
+ local_rank=0,
+ parallel_config=mock_parallel_config,
+ seed=1234,
+ fp8=False,
)
mock_initialize_model_parallel.assert_called_once_with(
world_size=2,
@@ -43,6 +80,8 @@ def test_init_parallel_ranks(mock_initialize_model_parallel) -> None:
tensor_model_parallel_size=2,
pipeline_model_parallel_size=3,
virtual_pipeline_model_parallel_size=4,
+ context_parallel_size=2,
+ expert_model_parallel_size=2,
seed=1234,
pipeline_model_parallel_split_rank=None,
use_fp8=False,
@@ -60,6 +99,8 @@ def test_init_model_parallel(mock_mpu, *args):
app_state.tensor_model_parallel_size = 2
app_state.pipeline_model_parallel_size = 1
app_state.pipeline_model_parallel_split_rank = None
+ app_state.context_parallel_size = 2
+ app_state.expert_model_parallel_size = 2
app_state.init_mpi_proc_group = False
app_state.tensor_model_parallel_rank = 2
app_state.pipeline_model_parallel_rank = 0
@@ -72,6 +113,8 @@ def test_init_model_parallel(mock_mpu, *args):
pipeline_model_parallel_size=1,
virtual_pipeline_model_parallel_size=None,
pipeline_model_parallel_split_rank=None,
+ context_parallel_size=2,
+ expert_model_parallel_size=2,
)
diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
index 94e2caa17a58..a02ee4f99714 100644
--- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
+++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb
@@ -225,7 +225,7 @@
"cell_type": "code",
"source": [
"if not os.path.exists(\"convert_hf_dataset_to_nemo.py\"):\n",
- " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\n",
+ " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\n",
""
],
"metadata": {
@@ -2217,4 +2217,4 @@
]
}
]
-}
\ No newline at end of file
+}
diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb
index 9877b983f2a1..612271a8baab 100644
--- a/tutorials/asr/Multilang_ASR.ipynb
+++ b/tutorials/asr/Multilang_ASR.ipynb
@@ -311,7 +311,7 @@
"outputs": [],
"source": [
"if not os.path.exists(\"convert_hf_dataset_to_nemo.py\"):\n",
- " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_hf_dataset_to_nemo.py"
+ " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/scripts/speech_recognition/convert_hf_dataset_to_nemo.py"
]
},
{
diff --git a/tutorials/asr/Transducers_with_HF_Datasets.ipynb b/tutorials/asr/Transducers_with_HF_Datasets.ipynb
index 432906250a71..a47cd00a0b9a 100644
--- a/tutorials/asr/Transducers_with_HF_Datasets.ipynb
+++ b/tutorials/asr/Transducers_with_HF_Datasets.ipynb
@@ -31,7 +31,7 @@
"!pip install matplotlib>=3.3.2\n",
"\n",
"## Install NeMo\n",
- "BRANCH = 'r1.23.0'\n",
+ "BRANCH = 'main'\n",
"!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n"
]
},
diff --git a/tutorials/asr/asr_adapters/Multi_Task_Adapters.ipynb b/tutorials/asr/asr_adapters/Multi_Task_Adapters.ipynb
index 51877b53fb8a..7bd36e6b6ad8 100644
--- a/tutorials/asr/asr_adapters/Multi_Task_Adapters.ipynb
+++ b/tutorials/asr/asr_adapters/Multi_Task_Adapters.ipynb
@@ -46,7 +46,7 @@
"# Multi Task Adaptation with Adapters\n",
"\n",
"\n",
- "In earliier tutorials, we utilized a specific model for one task - for example, an ASR model (CTC, RNN-T etc) for the singular task of Speech Recognition. This is very useful if we want to specialize one task per model, but it can be expensive to deploy a fleet of models for each task, and learn routers to pass user tasks to correct models.\n",
+ "In earlier tutorials, we utilized a specific model for one task - for example, an ASR model (CTC, RNN-T etc) for the singular task of Speech Recognition. This is very useful if we want to specialize one task per model, but it can be expensive to deploy a fleet of models for each task, and learn routers to pass user tasks to correct models.\n",
"\n",
"We now support Multi Task models in NeMo, such that a single model can perform multiple tasks such as speech recognition, speech translation, voice activity detection, and more in the future. With one model supporting multiple tasks, we can simplify the task of deploying models and also hope to leverage individual tasks to improve each other (for example: you do need strong speech recognition first before you start doing translation).\n",
"\n",
@@ -105,7 +105,7 @@
"id": "6c0c87c9-5290-4634-9338-818f181c936a"
},
"source": [
- "# Enable Adapter Suppport in Model\n",
+ "# Enable Adapter Support in Model\n",
"\n",
"New in NeMo 2.0, we now have a simple utility function to convert the model into one that supports adapters, called `replace_adapter_compatible_modules()`.\n",
"\n",
@@ -837,7 +837,7 @@
"\n",
"Data Modules are one way of organizing datasets in PyTorch Lightning. It provides a unified place where data loading and processing can be potentially handled.\n",
"\n",
- "**Note**: This isnt strictly necessary - you can achieve the same using just Pytorch dataloaders directly and passing it to Trainer.fit() but we showcase a data module codebase that can be extended by the user."
+ "**Note**: This isn't strictly necessary - you can achieve the same using just Pytorch dataloaders directly and passing it to Trainer.fit() but we showcase a data module codebase that can be extended by the user."
]
},
{
@@ -1493,10 +1493,10 @@
"id": "2525bec5-c42b-48c1-b03c-e8126c346238"
},
"source": [
- "# Evaluate after Adaptatation\n",
+ "# Evaluate after Adaptation\n",
"\n",
- "Now that the model is done training, lets evalaute its scores on the test set again.\n",
- "We should see a markedly higher translastion BLEU and lower WER from above."
+ "Now that the model is done training, lets evaluate its scores on the test set again.\n",
+ "We should see a markedly higher translation BLEU and lower WER from above."
]
},
{
@@ -1657,4 +1657,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/tutorials/llm/llama-3/README.rst b/tutorials/llm/llama-3/README.rst
index 663c0c99abfc..be232edd1df0 100755
--- a/tutorials/llm/llama-3/README.rst
+++ b/tutorials/llm/llama-3/README.rst
@@ -1,174 +1,19 @@
-Llama 3 LoRA Fine-Tuning and Deployment with NeMo Framework and NVIDIA NIM
-==========================================================================
-`Llama 3 `_ is an open-source large language model by Meta that delivers state-of-the-art performance on popular industry benchmarks. It has been pretrained on over 15 trillion tokens, and supports an 8K token context length. It is available in two sizes, 8B and 70B, and each size has two variants—base pretrained and instruction tuned.
+Getting Started with Llama 3 and Llama 3.1
+==========================================
-`Low-Rank Adaptation (LoRA) `__ has emerged as a popular Parameter-Efficient Fine-Tuning (PEFT) technique that tunes a very small number of additional parameters as compared to full fine-tuning, thereby reducing the compute required.
+This repository contains jupyter notebook tutorials using NeMo Framework for Llama-3 and Llama-3.1 models by Meta.
-`NVIDIA NeMo
-Framework `__ provides tools to perform LoRA on Llama 3 to fit your use case, which can then be deployed using `NVIDIA NIM `__ for optimized inference on NVIDIA GPUs.
+.. list-table::
+ :widths: 100 25 100
+ :header-rows: 1
-.. figure:: ./img/e2e-lora-train-and-deploy.png
- :width: 1000
- :alt: Diagram showing the steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM. The steps include converting the base model to .nemo format, creating LoRA adapters with NeMo, and then depoying the LoRA adapter with NIM for inference.
- :align: center
-
- Figure 1: Steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM
-
-
-| NIM enables seamless deployment of multiple LoRA adapters (referred to as “multi-LoRA”) on the same base model. It dynamically loads the adapter weights based on incoming requests at runtime. This flexibility allows handling inputs from various tasks or use cases without deploying a unique model for each individual scenario. For further details, consult the `NIM documentation for LLMs `__.
-
-Requirements
--------------
-
-* System Configuration
- * Access to at least 1 NVIDIA GPU with a cumulative memory of at least 80GB, for example: 1 x H100-80GB or 1 x A100-80GB.
- * A Docker-enabled environment, with `NVIDIA Container Runtime `_ installed, which will make the container GPU-aware.
- * `Additional NIM requirements `_.
-
-* `Authenticate with NVIDIA NGC `_, and download `NGC CLI Tool `_. You will use this tool to download the model and customize it with NeMo Framework.
-
-
-`Create a LoRA Adapter with NeMo Framework <./llama3-lora-nemofw.ipynb>`__
---------------------------------------------------------------------------
-
-This notebook shows how to perform LoRA PEFT on **Llama 3 8B Instruct** using `PubMedQA `__ with NeMo Framework. PubMedQA is a Question-Answering dataset for biomedical texts. You will use the NeMo Framework which is available as a `docker container `__.
-
-1. Download the `Llama 3 8B Instruct .nemo `__ from NVIDIA NGC using the NGC CLI. The following command saves the ``.nemo`` format model in a folder named ``llama-3-8b-instruct-nemo_v1.0`` in the current directory. You can specify another path using the ``-d`` option in the CLI tool.
-
-.. code:: bash
-
- ngc registry model download-version "nvidia/nemo/llama-3-8b-instruct-nemo:1.0"
-
-
-Alternatively, you can download the model from `Hugging Face `__ and convert it to the ``.nemo`` format using the Hugging Face to NeMo `Llama checkpoint conversion script `__. If you'd like to skip this extra step, the ``.nemo`` model is available on NGC as mentioned above.
-
-2. Run the container using the following command. It is assumed that you have the notebook(s) and llama-3-8b-instruct model available in the current directory. If not, mount the appropriate folder to ``/workspace``.
-
-.. code:: bash
-
- export FW_VERSION=24.05 # Make sure to choose the latest available tag
-
-
-.. code:: bash
-
- docker run \
- --gpus all \
- --shm-size=2g \
- --net=host \
- --ulimit memlock=-1 \
- --rm -it \
- -v ${PWD}:/workspace \
- -w /workspace \
- -v ${PWD}/results:/results \
- nvcr.io/nvidia/nemo:$FW_VERSION bash
-
-3. From within the container, start the Jupyter lab:
-
-.. code:: bash
-
- jupyter lab --ip 0.0.0.0 --port=8888 --allow-root
-
-4. Then, navigate to `this notebook <./llama3-lora-nemofw.ipynb>`__.
-
-
-`Deploy Multiple LoRA Inference Adapters with NVIDIA NIM <./llama3-lora-deploy-nim.ipynb>`__
---------------------------------------------------------------------------------------------
-
-This procedure demonstrates how to deploy multiple LoRA adapters with NVIDIA NIM. NIM supports LoRA adapters in ``.nemo`` (from NeMo Framework), and Hugging Face model formats. You will deploy the PubMedQA LoRA adapter from the first notebook, alongside two previously trained LoRA adapters (`GSM8K `__, `SQuAD `__) that are available on NVIDIA NGC as examples.
-
-``NOTE``: Although it’s not mandatory to finish the LoRA training and secure the adapter from the preceding notebook (“Creating a LoRA adapter with NeMo Framework”) to proceed with this one, it is advisable. Regardless, you can continue to learn about LoRA deployment with NIM using other adapters that you’ve downloaded from NVIDIA NGC.
-
-
-1. Download the example LoRA adapters.
-
-The following steps assume that you have authenticated with NGC and downloaded the CLI tool, as listed in the Requirements section.
-
-.. code:: bash
-
- # Set path to your LoRA model store
- export LOCAL_PEFT_DIRECTORY="$(pwd)/loras"
-
-
-.. code:: bash
-
- mkdir -p $LOCAL_PEFT_DIRECTORY
- pushd $LOCAL_PEFT_DIRECTORY
-
- # downloading NeMo-format loras
- ngc registry model download-version "nim/meta/llama3-8b-instruct-lora:nemo-math-v1"
- ngc registry model download-version "nim/meta/llama3-8b-instruct-lora:nemo-squad-v1"
-
- popd
- chmod -R 777 $LOCAL_PEFT_DIRECTORY
-
-2. Prepare the LoRA model store.
-
-After training is complete, that LoRA model checkpoint will be created at ``./results/Meta-Llama-3-8B-Instruct/checkpoints/megatron_gpt_peft_lora_tuning.nemo``, assuming default paths in the first notebook weren’t modified.
-
-To ensure the model store is organized as expected, create a folder named ``llama3-8b-pubmed-qa``, and move your ``.nemo`` checkpoint there.
-
-.. code:: bash
-
- mkdir -p $LOCAL_PEFT_DIRECTORY/llama3-8b-pubmed-qa
-
- # Ensure the source path is correct
- cp ./results/Meta-Llama-3-8B-Instruct/checkpoints/megatron_gpt_peft_lora_tuning.nemo $LOCAL_PEFT_DIRECTORY/llama3-8b-pubmed-qa
-
-
-
-Ensure that the LoRA model store directory follows this structure: the model name(s) should be sub-folder(s) containing the ``.nemo`` file(s).
-
-::
-
- <$LOCAL_PEFT_DIRECTORY>
- ├── llama3-8b-instruct-lora_vnemo-math-v1
- │ └── llama3_8b_math.nemo
- ├── llama3-8b-instruct-lora_vnemo-squad-v1
- │ └── llama3_8b_squad.nemo
- └── llama3-8b-pubmed-qa
- └── megatron_gpt_peft_lora_tuning.nemo
-
-The last one was just trained on the PubmedQA dataset in the previous notebook.
-
-
-3. Set-up NIM.
-
-From your host OS environment, start the NIM docker container while mounting the LoRA model store, as follows:
-
-.. code:: bash
-
- # Set these configurations
- export NGC_API_KEY=
- export NIM_PEFT_REFRESH_INTERVAL=3600 # (in seconds) will check NIM_PEFT_SOURCE for newly added models in this interval
- export NIM_CACHE_PATH= # Model artifacts (in container) are cached in this directory
-
-
-.. code:: bash
-
- mkdir -p $NIM_CACHE_PATH
- chmod -R 777 $NIM_CACHE_PATH
-
- export NIM_PEFT_SOURCE=/home/nvs/loras # Path to LoRA models internal to the container
- export CONTAINER_NAME=meta-llama3-8b-instruct
-
- docker run -it --rm --name=$CONTAINER_NAME \
- --runtime=nvidia \
- --gpus all \
- --shm-size=16GB \
- -e NGC_API_KEY \
- -e NIM_PEFT_SOURCE \
- -e NIM_PEFT_REFRESH_INTERVAL \
- -v $NIM_CACHE_PATH:/opt/nim/.cache \
- -v $LOCAL_PEFT_DIRECTORY:$NIM_PEFT_SOURCE \
- -p 8000:8000 \
- nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
-
-The first time you run the command, it will download the model and cache it in ``$NIM_CACHE_PATH`` so subsequent deployments are even faster. There are several options to configure NIM other than the ones listed above. You can find a full list in the `NIM configuration `__ documentation.
-
-
-4. Start the notebook.
-
-From another terminal, follow the same instructions as the previous notebook to launch Jupyter Lab, and then navigate to `this notebook <./llama3-lora-deploy-nim.ipynb>`__.
-
-You can use the same NeMo Framework docker container which has Jupyter Lab already installed.
\ No newline at end of file
+ * - Tutorial
+ - Dataset
+ - Description
+ * - `Llama 3 LoRA Fine-Tuning and Multi-LoRA Deployment with NeMo Framework and NVIDIA NIM <./biomedical-qa>`_
+ - `PubMedQA `_
+ - Perform LoRA PEFT on Llama 3 8B Instruct using a dataset for bio-medical domain question answering. Deploy multiple LoRA adapters with NVIDIA NIM.
+ * - `Llama 3.1 Law-Domain LoRA Fine-Tuning and Deployment with NeMo Framework and NVIDIA NIM <./sdg-law-title-generation>`_
+ - `Law StackExchange `_
+ - Perform LoRA PEFT on Llama 3.1 8B Instruct using a synthetically augmented version of Law StackExchange with NeMo Framework, followed by deployment with NVIDIA NIM. As a pre-requisite, follow the tutorial for `data curation using NeMo Curator `__.
diff --git a/tutorials/llm/llama-3/biomedical-qa/README.rst b/tutorials/llm/llama-3/biomedical-qa/README.rst
new file mode 100755
index 000000000000..663c0c99abfc
--- /dev/null
+++ b/tutorials/llm/llama-3/biomedical-qa/README.rst
@@ -0,0 +1,174 @@
+Llama 3 LoRA Fine-Tuning and Deployment with NeMo Framework and NVIDIA NIM
+==========================================================================
+
+`Llama 3 `_ is an open-source large language model by Meta that delivers state-of-the-art performance on popular industry benchmarks. It has been pretrained on over 15 trillion tokens, and supports an 8K token context length. It is available in two sizes, 8B and 70B, and each size has two variants—base pretrained and instruction tuned.
+
+`Low-Rank Adaptation (LoRA) `__ has emerged as a popular Parameter-Efficient Fine-Tuning (PEFT) technique that tunes a very small number of additional parameters as compared to full fine-tuning, thereby reducing the compute required.
+
+`NVIDIA NeMo
+Framework `__ provides tools to perform LoRA on Llama 3 to fit your use case, which can then be deployed using `NVIDIA NIM `__ for optimized inference on NVIDIA GPUs.
+
+.. figure:: ./img/e2e-lora-train-and-deploy.png
+ :width: 1000
+ :alt: Diagram showing the steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM. The steps include converting the base model to .nemo format, creating LoRA adapters with NeMo, and then depoying the LoRA adapter with NIM for inference.
+ :align: center
+
+ Figure 1: Steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM
+
+
+| NIM enables seamless deployment of multiple LoRA adapters (referred to as “multi-LoRA”) on the same base model. It dynamically loads the adapter weights based on incoming requests at runtime. This flexibility allows handling inputs from various tasks or use cases without deploying a unique model for each individual scenario. For further details, consult the `NIM documentation for LLMs `__.
+
+Requirements
+-------------
+
+* System Configuration
+ * Access to at least 1 NVIDIA GPU with a cumulative memory of at least 80GB, for example: 1 x H100-80GB or 1 x A100-80GB.
+ * A Docker-enabled environment, with `NVIDIA Container Runtime `_ installed, which will make the container GPU-aware.
+ * `Additional NIM requirements `_.
+
+* `Authenticate with NVIDIA NGC `_, and download `NGC CLI Tool `_. You will use this tool to download the model and customize it with NeMo Framework.
+
+
+`Create a LoRA Adapter with NeMo Framework <./llama3-lora-nemofw.ipynb>`__
+--------------------------------------------------------------------------
+
+This notebook shows how to perform LoRA PEFT on **Llama 3 8B Instruct** using `PubMedQA `__ with NeMo Framework. PubMedQA is a Question-Answering dataset for biomedical texts. You will use the NeMo Framework which is available as a `docker container `__.
+
+1. Download the `Llama 3 8B Instruct .nemo `__ from NVIDIA NGC using the NGC CLI. The following command saves the ``.nemo`` format model in a folder named ``llama-3-8b-instruct-nemo_v1.0`` in the current directory. You can specify another path using the ``-d`` option in the CLI tool.
+
+.. code:: bash
+
+ ngc registry model download-version "nvidia/nemo/llama-3-8b-instruct-nemo:1.0"
+
+
+Alternatively, you can download the model from `Hugging Face `__ and convert it to the ``.nemo`` format using the Hugging Face to NeMo `Llama checkpoint conversion script `__. If you'd like to skip this extra step, the ``.nemo`` model is available on NGC as mentioned above.
+
+2. Run the container using the following command. It is assumed that you have the notebook(s) and llama-3-8b-instruct model available in the current directory. If not, mount the appropriate folder to ``/workspace``.
+
+.. code:: bash
+
+ export FW_VERSION=24.05 # Make sure to choose the latest available tag
+
+
+.. code:: bash
+
+ docker run \
+ --gpus all \
+ --shm-size=2g \
+ --net=host \
+ --ulimit memlock=-1 \
+ --rm -it \
+ -v ${PWD}:/workspace \
+ -w /workspace \
+ -v ${PWD}/results:/results \
+ nvcr.io/nvidia/nemo:$FW_VERSION bash
+
+3. From within the container, start the Jupyter lab:
+
+.. code:: bash
+
+ jupyter lab --ip 0.0.0.0 --port=8888 --allow-root
+
+4. Then, navigate to `this notebook <./llama3-lora-nemofw.ipynb>`__.
+
+
+`Deploy Multiple LoRA Inference Adapters with NVIDIA NIM <./llama3-lora-deploy-nim.ipynb>`__
+--------------------------------------------------------------------------------------------
+
+This procedure demonstrates how to deploy multiple LoRA adapters with NVIDIA NIM. NIM supports LoRA adapters in ``.nemo`` (from NeMo Framework), and Hugging Face model formats. You will deploy the PubMedQA LoRA adapter from the first notebook, alongside two previously trained LoRA adapters (`GSM8K `__, `SQuAD `__) that are available on NVIDIA NGC as examples.
+
+``NOTE``: Although it’s not mandatory to finish the LoRA training and secure the adapter from the preceding notebook (“Creating a LoRA adapter with NeMo Framework”) to proceed with this one, it is advisable. Regardless, you can continue to learn about LoRA deployment with NIM using other adapters that you’ve downloaded from NVIDIA NGC.
+
+
+1. Download the example LoRA adapters.
+
+The following steps assume that you have authenticated with NGC and downloaded the CLI tool, as listed in the Requirements section.
+
+.. code:: bash
+
+ # Set path to your LoRA model store
+ export LOCAL_PEFT_DIRECTORY="$(pwd)/loras"
+
+
+.. code:: bash
+
+ mkdir -p $LOCAL_PEFT_DIRECTORY
+ pushd $LOCAL_PEFT_DIRECTORY
+
+ # downloading NeMo-format loras
+ ngc registry model download-version "nim/meta/llama3-8b-instruct-lora:nemo-math-v1"
+ ngc registry model download-version "nim/meta/llama3-8b-instruct-lora:nemo-squad-v1"
+
+ popd
+ chmod -R 777 $LOCAL_PEFT_DIRECTORY
+
+2. Prepare the LoRA model store.
+
+After training is complete, that LoRA model checkpoint will be created at ``./results/Meta-Llama-3-8B-Instruct/checkpoints/megatron_gpt_peft_lora_tuning.nemo``, assuming default paths in the first notebook weren’t modified.
+
+To ensure the model store is organized as expected, create a folder named ``llama3-8b-pubmed-qa``, and move your ``.nemo`` checkpoint there.
+
+.. code:: bash
+
+ mkdir -p $LOCAL_PEFT_DIRECTORY/llama3-8b-pubmed-qa
+
+ # Ensure the source path is correct
+ cp ./results/Meta-Llama-3-8B-Instruct/checkpoints/megatron_gpt_peft_lora_tuning.nemo $LOCAL_PEFT_DIRECTORY/llama3-8b-pubmed-qa
+
+
+
+Ensure that the LoRA model store directory follows this structure: the model name(s) should be sub-folder(s) containing the ``.nemo`` file(s).
+
+::
+
+ <$LOCAL_PEFT_DIRECTORY>
+ ├── llama3-8b-instruct-lora_vnemo-math-v1
+ │ └── llama3_8b_math.nemo
+ ├── llama3-8b-instruct-lora_vnemo-squad-v1
+ │ └── llama3_8b_squad.nemo
+ └── llama3-8b-pubmed-qa
+ └── megatron_gpt_peft_lora_tuning.nemo
+
+The last one was just trained on the PubmedQA dataset in the previous notebook.
+
+
+3. Set-up NIM.
+
+From your host OS environment, start the NIM docker container while mounting the LoRA model store, as follows:
+
+.. code:: bash
+
+ # Set these configurations
+ export NGC_API_KEY=
+ export NIM_PEFT_REFRESH_INTERVAL=3600 # (in seconds) will check NIM_PEFT_SOURCE for newly added models in this interval
+ export NIM_CACHE_PATH= # Model artifacts (in container) are cached in this directory
+
+
+.. code:: bash
+
+ mkdir -p $NIM_CACHE_PATH
+ chmod -R 777 $NIM_CACHE_PATH
+
+ export NIM_PEFT_SOURCE=/home/nvs/loras # Path to LoRA models internal to the container
+ export CONTAINER_NAME=meta-llama3-8b-instruct
+
+ docker run -it --rm --name=$CONTAINER_NAME \
+ --runtime=nvidia \
+ --gpus all \
+ --shm-size=16GB \
+ -e NGC_API_KEY \
+ -e NIM_PEFT_SOURCE \
+ -e NIM_PEFT_REFRESH_INTERVAL \
+ -v $NIM_CACHE_PATH:/opt/nim/.cache \
+ -v $LOCAL_PEFT_DIRECTORY:$NIM_PEFT_SOURCE \
+ -p 8000:8000 \
+ nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
+
+The first time you run the command, it will download the model and cache it in ``$NIM_CACHE_PATH`` so subsequent deployments are even faster. There are several options to configure NIM other than the ones listed above. You can find a full list in the `NIM configuration `__ documentation.
+
+
+4. Start the notebook.
+
+From another terminal, follow the same instructions as the previous notebook to launch Jupyter Lab, and then navigate to `this notebook <./llama3-lora-deploy-nim.ipynb>`__.
+
+You can use the same NeMo Framework docker container which has Jupyter Lab already installed.
\ No newline at end of file
diff --git a/tutorials/llm/llama-3/img/e2e-lora-train-and-deploy.png b/tutorials/llm/llama-3/biomedical-qa/img/e2e-lora-train-and-deploy.png
similarity index 100%
rename from tutorials/llm/llama-3/img/e2e-lora-train-and-deploy.png
rename to tutorials/llm/llama-3/biomedical-qa/img/e2e-lora-train-and-deploy.png
diff --git a/tutorials/llm/llama-3/llama3-lora-deploy-nim.ipynb b/tutorials/llm/llama-3/biomedical-qa/llama3-lora-deploy-nim.ipynb
similarity index 100%
rename from tutorials/llm/llama-3/llama3-lora-deploy-nim.ipynb
rename to tutorials/llm/llama-3/biomedical-qa/llama3-lora-deploy-nim.ipynb
diff --git a/tutorials/llm/llama-3/llama3-lora-nemofw.ipynb b/tutorials/llm/llama-3/biomedical-qa/llama3-lora-nemofw.ipynb
similarity index 100%
rename from tutorials/llm/llama-3/llama3-lora-nemofw.ipynb
rename to tutorials/llm/llama-3/biomedical-qa/llama3-lora-nemofw.ipynb
diff --git a/tutorials/llm/llama-3/sdg-law-title-generation/README.rst b/tutorials/llm/llama-3/sdg-law-title-generation/README.rst
new file mode 100755
index 000000000000..58fc4a86eaa7
--- /dev/null
+++ b/tutorials/llm/llama-3/sdg-law-title-generation/README.rst
@@ -0,0 +1,170 @@
+Llama 3.1 Law-Domain LoRA Fine-Tuning and Deployment with NeMo Framework and NVIDIA NIM
+=======================================================================================
+
+`Llama 3.1 `_ are open-source large language models by Meta that deliver state-of-the-art performance on popular industry benchmarks. They have been pretrained on over 15 trillion tokens, and support a 128K token context length. They are available in three sizes, 8B, 70B, and 405B, and each size has two variants—base pretrained and instruction tuned.
+
+`Low-Rank Adaptation (LoRA) `__ has emerged as a popular Parameter-Efficient Fine-Tuning (PEFT) technique that tunes a very small number of additional parameters as compared to full fine-tuning, thereby reducing the compute required.
+
+`NVIDIA NeMo
+Framework `__ provides tools to perform LoRA on Llama 3.1 to fit your use case, which can then be deployed using `NVIDIA NIM `__ for optimized inference on NVIDIA GPUs.
+
+.. figure:: ./img/e2e-lora-train-and-deploy.png
+ :width: 1000
+ :alt: Diagram showing the steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM. The steps include converting the base model to .nemo format, creating LoRA adapters with NeMo, and then depoying the LoRA adapter with NIM for inference.
+ :align: center
+
+ Figure 1: Steps for LoRA customization using the NVIDIA NeMo Framework and deployment with NVIDIA NIM
+
+
+| NIM also enables seamless deployment of multiple LoRA adapters (referred to as “multi-LoRA”) on the same base model. It dynamically loads the adapter weights based on incoming requests at runtime. This flexibility allows handling inputs from various tasks or use cases without deploying a unique model for each individual scenario. For further details, consult the `NIM documentation for LLMs `__.
+
+Objectives
+----------
+
+This tutorial shows how to perform LoRA PEFT on **Llama 3.1 8B Instruct** using a synthetically augmented version of `Law StackExchange `__ with NeMo Framework. Law StackExchange is a dataset of legal questions, question titles, and answers. For this demonstration, we will tune the model on the task of title/subject generation, that is, given a Law StackExchange forum question, auto-generate an appropriate title for it. We will then deploy the LoRA tuned model with NVIDIA NIM for inference.
+
+Requirements
+-------------
+
+* **Obtain the dataset:** This tutorial is a continuation of the Data Curation tutorial - `Curating Datasets for Parameter Efficient Fine-tuning with Synthetic Data Generation `__. It demonstrates various filtering and processing operations on the records to improve data quality, as well as (optional) synthetic data generation (SDG) to augment the dataset. Please follow this tutorial to obtain the resulting dataset needed.
+
+
+* System Configuration
+ * Access to at least 1 NVIDIA GPU with a cumulative memory of at least 80GB, for example: 1 x H100-80GB or 1 x A100-80GB.
+ * A Docker-enabled environment, with `NVIDIA Container Runtime `_ installed, which will make the container GPU-aware.
+ * `Additional NIM requirements `_.
+
+* `Authenticate with NVIDIA NGC `_, and download `NGC CLI Tool `_. You will use this tool to download the model and customize it with NeMo Framework.
+
+* Get your Hugging Face `access token `_, which will be used to obtain the tokenizer required during training.
+
+
+`Process the Dataset with NeMo Curator `__
+-------------------------------------------------------------------------------------------------------
+
+1. Save the dataset in the current directory. You will have obtained `law-qa-{train/val/test}.jsonl` splits resulting from following the abovementioned `data curation tutorial `__.
+
+.. code:: bash
+
+ mkdir -p curated-data
+
+ # Make sure to update the path below as appropriate
+ cp .jsonl curated-data/.
+
+
+`Create a LoRA Adapter with NeMo Framework <./llama3-sdg-lora-nemofw.ipynb>`__
+------------------------------------------------------------------------------
+
+For LoRA-tuning the model, you will use the NeMo Framework which is available as a `docker container `__.
+
+
+1. Download the `Llama 3.1 8B Instruct .nemo `__ from NVIDIA NGC using the NGC CLI. The following command saves the ``.nemo`` format model in a folder named ``llama-3_1-8b-instruct-nemo_v1.0`` in the current directory. You can specify another path using the ``-d`` option in the CLI tool.
+
+.. code:: bash
+
+ ngc registry model download-version "nvidia/nemo/llama-3_1-8b-instruct-nemo:1.0"
+
+
+
+2. Run the container using the following command. It is assumed that you have the dataset, notebook(s), and the `llama-3.1-8b-instruct` model available in the current directory. If not, mount the appropriate folder to ``/workspace``.
+
+.. code:: bash
+
+ export FW_VERSION=24.05.llama3.1
+
+
+.. code:: bash
+
+ docker run \
+ --gpus all \
+ --shm-size=2g \
+ --net=host \
+ --ulimit memlock=-1 \
+ --rm -it \
+ -v ${PWD}:/workspace \
+ -w /workspace \
+ -v ${PWD}/results:/results \
+ nvcr.io/nvidia/nemo:$FW_VERSION bash
+
+3. From within the container, start the Jupyter lab:
+
+.. code:: bash
+
+ jupyter lab --ip 0.0.0.0 --port=8888 --allow-root
+
+4. Then, navigate to `this notebook <./llama3-sdg-lora-nemofw.ipynb>`__.
+
+
+`Deploy the LoRA Inference Adapter with NVIDIA NIM <./llama3-sdg-lora-deploy-nim.ipynb>`__
+--------------------------------------------------------------------------------------
+
+This procedure demonstrates how to deploy the trained LoRA adapter with NVIDIA NIM. NIM supports LoRA adapters in ``.nemo`` (from NeMo Framework), and Hugging Face model formats. You will deploy the Law StackExchange title-generation LoRA adapter from the first notebook.
+
+1. Prepare the LoRA model store.
+
+After training is complete, that LoRA model checkpoint will be created at ``./results/Meta-llama3.1-8B-Instruct-titlegen/checkpoints/megatron_gpt_peft_lora_tuning.nemo``, assuming default paths in the first notebook weren’t modified.
+
+To ensure the model store is organized as expected, create a folder named ``llama3.1-8b-law-titlegen`` under a model store directory, and move your ``.nemo`` checkpoint there.
+
+.. code:: bash
+
+ # Set path to your LoRA model store
+ export LOCAL_PEFT_DIRECTORY="$(pwd)/loras"
+
+ mkdir -p $LOCAL_PEFT_DIRECTORY/llama3.1-8b-law-titlegen
+
+ # Ensure the source path is correct
+ cp ./results/Meta-llama3.1-8B-Instruct-titlegen/checkpoints/megatron_gpt_peft_lora_tuning.nemo $LOCAL_PEFT_DIRECTORY/llama3.1-8b-law-titlegen
+
+
+Ensure that the LoRA model store directory follows this structure: the model name would be name of the sub-folder containing the ``.nemo`` file.
+
+::
+
+ <$LOCAL_PEFT_DIRECTORY>
+ └── llama3.1-8b-law-titlegen
+ └── megatron_gpt_peft_lora_tuning.nemo
+
+
+Note that NIM supports deployment of multiple LoRA adapters over the same base model. As such, if you have any other adapters for other tasks trained or available, you can place them in separate sub-folders under `$LOCAL_PEFT_DIRECTORY`.
+
+2. Set-up NIM.
+
+From your host OS environment, start the NIM docker container while mounting the LoRA model store, as follows:
+
+.. code:: bash
+
+ # Set these configurations
+ export NGC_API_KEY=
+ export NIM_PEFT_REFRESH_INTERVAL=3600 # (in seconds) will check NIM_PEFT_SOURCE for newly added models in this interval
+ export NIM_CACHE_PATH= # Model artifacts (in container) are cached in this directory
+
+
+.. code:: bash
+
+ mkdir -p $NIM_CACHE_PATH
+ chmod -R 777 $NIM_CACHE_PATH
+
+ export NIM_PEFT_SOURCE=/home/nvs/loras # Path to LoRA models internal to the container
+ export CONTAINER_NAME=meta-llama3.1-8b-instruct
+
+ docker run -it --rm --name=$CONTAINER_NAME \
+ --gpus all \
+ --network=host \
+ --shm-size=16GB \
+ -e NGC_API_KEY \
+ -e NIM_PEFT_SOURCE \
+ -v $NIM_CACHE_PATH:/opt/nim/.cache \
+ -v $LOCAL_PEFT_DIRECTORY:$NIM_PEFT_SOURCE \
+ nvcr.io/nim/meta/llama-3.1-8b-instruct:1.1.0
+
+The first time you run the command, it will download the model and cache it in ``$NIM_CACHE_PATH`` so subsequent deployments are even faster. There are several options to configure NIM other than the ones listed above. You can find a full list in the `NIM configuration `__ documentation.
+
+
+3. Start the notebook.
+
+From another terminal, follow the same instructions as the previous notebook to launch Jupyter Lab, and then navigate to `this notebook <./llama3-sdg-lora-deploy-nim.ipynb>`__.
+
+You can use the same NeMo Framework docker container which has Jupyter Lab already installed.
+
+
diff --git a/tutorials/llm/llama-3/sdg-law-title-generation/img/e2e-lora-train-and-deploy.png b/tutorials/llm/llama-3/sdg-law-title-generation/img/e2e-lora-train-and-deploy.png
new file mode 100755
index 000000000000..16bb47eed431
Binary files /dev/null and b/tutorials/llm/llama-3/sdg-law-title-generation/img/e2e-lora-train-and-deploy.png differ
diff --git a/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-deploy-nim.ipynb b/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-deploy-nim.ipynb
new file mode 100755
index 000000000000..783c5d951944
--- /dev/null
+++ b/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-deploy-nim.ipynb
@@ -0,0 +1,170 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "884f3125",
+ "metadata": {},
+ "source": [
+ "# LoRA inference with NVIDIA NIM\n",
+ "\n",
+ "This is a demonstration of running inference against a LoRA adapter deployed with NVIDIA NIM. NIM supports LoRA adapters in .nemo (from NeMo Framework), and Hugging Face model formats. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b161f16b",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "This notebook includes instructions to send an inference call to NVIDIA NIM using the Python `requests` library."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54759732",
+ "metadata": {},
+ "source": [
+ "## Before you begin\n",
+ "Ensure that you satisfy the pre-requisites, and have completed the setup instructions provided in the README associated with this tutorial to deploy the NIM container with LoRA."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e7917da",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fa2477e9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39d9918a",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Check available LoRA models\n",
+ "\n",
+ "Once the NIM server is up and running, check the available models as follows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f2d71965",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "url = 'http://0.0.0.0:8000/v1/models'\n",
+ "\n",
+ "response = requests.get(url)\n",
+ "data = response.json()\n",
+ "\n",
+ "print(json.dumps(data, indent=4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d5a93ac",
+ "metadata": {},
+ "source": [
+ "This will return all the models available for inference by NIM. In this case, it will return the base model, as well as the LoRA adapters that were provided during NIM deployment - `llama3.1-8b-law-titlegen`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7c19acd",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "## LoRA inference\n",
+ "\n",
+ "Inference can be performed by sending POST requests to the `/completions` endpoint.\n",
+ "\n",
+ "A few things to note:\n",
+ "* The `model` parameter in the payload specifies the model that the request will be directed to. This can be the base model `meta/llama3.1-8b-instruct`, or any of the LoRA models, such as `llama3.1-8b-law-titlegen`.\n",
+ "* `max_tokens` parameter specifies the maximum number of tokens to generate. At any point, the cumulative number of input prompt tokens and specified number of output tokens to generate should not exceed the model's maximum context limit. For llama3-8b-instruct, the context length supported is 8192 tokens.\n",
+ "\n",
+ "Following code snippets show how it's possible to send requests belonging to different LoRAs (or tasks). NIM dynamically loads the LoRA adapters and serves the requests. It also internally handles the batching of requests belonging to different LoRAs to allow better performance and more efficient of compute."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3edd2a9e",
+ "metadata": {},
+ "source": [
+ "### Title Generation\n",
+ "\n",
+ "Try sending an example from the test set."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cf6ea42a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "url = 'http://0.0.0.0:8000/v1/completions'\n",
+ "\n",
+ "headers = {\n",
+ " 'accept': 'application/json',\n",
+ " 'Content-Type': 'application/json'\n",
+ "}\n",
+ "\n",
+ "# Example from the test set, following the template we trained the lora with\n",
+ "prompt=\"Generate a concise, engaging title for the following legal question on an internet forum. The title should be legally relevant, capture key aspects of the issue, and entice readers to learn more. \\nQUESTION: In order to be sued in a particular jurisdiction, say New York, a company must have a minimal business presence in the jurisdiction. What constitutes such a presence? Suppose the company engaged a New York-based Plaintiff, and its representatives signed the contract with the Plaintiff in New York City. Does this satisfy the minimum presence rule? Suppose, instead, the plaintiff and contract signing were in New Jersey, but the company hired a law firm with offices in New York City. Does this qualify? \\nTITLE: \"\n",
+ "data = {\n",
+ " \"model\": \"llama3.1-8b-law-titlegen\",\n",
+ " \"prompt\": prompt,\n",
+ " \"max_tokens\": 25,\n",
+ " \"temperature\":0\n",
+ "}\n",
+ "\n",
+ "response = requests.post(url, headers=headers, json=data)\n",
+ "response_data = response.json()\n",
+ "\n",
+ "print(json.dumps(response_data, indent=4))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-nemofw.ipynb b/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-nemofw.ipynb
new file mode 100755
index 000000000000..d597a60d6c0b
--- /dev/null
+++ b/tutorials/llm/llama-3/sdg-law-title-generation/llama3-sdg-lora-nemofw.ipynb
@@ -0,0 +1,562 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a952a54c",
+ "metadata": {},
+ "source": [
+ "# Creating a Llama 3.1 LoRA adapter with NeMo Framework using a Synthetic Dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "63b3bd1c",
+ "metadata": {},
+ "source": [
+ "This notebook showcases performing LoRA finetuning on **Llama 3.1-8B-Instruct** with a synthetically augmented version of [Law StackExchange](https://huggingface.co/datasets/ymoslem/Law-StackExchange) dataset using NeMo Framework. Law StackExchange is a dataset of legal question/answers. Each record consists of a question, its title, as well as human-provided answers.\n",
+ "\n",
+ "For this demonstration, we will tune the model on the task of title/subject generation, that is, given a Law StackExchange forum question, auto-generate an appropriate title for it.\n",
+ "\n",
+ "> `NOTE:` Ensure that you run this notebook inside the [NeMo Framework container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo) which has all the required dependencies. **Instructions are available in the associated tutorial README to download the model and the container.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92ba5569",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!pip install ipywidgets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1cf3dc30",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import json\n",
+ "import numpy as np\n",
+ "from rouge_score import rouge_scorer, scoring"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b129833",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "---\n",
+ "## Before you begin\n",
+ "Ensure you have the following -\n",
+ "1. **Generate the synthetic dataset**: Follow the [PEFT Synthetic Data Generation (SDG)](https://github.com/NVIDIA/NeMo-Curator/tree/main/tutorials/peft-curation-with-sdg) tutorial to obtain the synthetic dataset. Once obtained, you must follow the instructions in the associated README to mount it in the NeMo FW container."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8492edc1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "DATA_DIR = os.path.join(\"/workspace/curated-data\")\n",
+ "\n",
+ "!ls {DATA_DIR}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "97990ad8",
+ "metadata": {},
+ "source": [
+ "You should see the `law-qa-{train/val/test}.jsonl` splits resulting from following the abovementioned SDG tutorial."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "63b061f4",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "TRAIN_DS = os.path.join(DATA_DIR, \"law-qa-train.jsonl\")\n",
+ "VAL_DS = os.path.join(DATA_DIR, \"law-qa-val.jsonl\")\n",
+ "TEST_DS = os.path.join(DATA_DIR, \"law-qa-test.jsonl\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1c0f9b8",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "2. **Get the model**: Download the `Meta Llama 3.1 8B Instruct .nemo` model and mount the corresponding folder to the container."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3728f222",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!ls /workspace/llama-3_1-8b-instruct-nemo_v1.0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56b7a698",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "3. **Set the Hugging Face Access Token**: You can obtain this from your [Hugging Face account](https://huggingface.co/docs/hub/en/security-tokens). "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b546cb59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from huggingface_hub import login\n",
+ "\n",
+ "login(token=\"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "570025c5",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "---\n",
+ "## Step-by-step instructions\n",
+ "\n",
+ "This notebook is structured into four steps:\n",
+ "1. Prepare the dataset\n",
+ "2. Run the PEFT finetuning script\n",
+ "3. Inference with NeMo Framework\n",
+ "4. Check the model accuracy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3894607a",
+ "metadata": {},
+ "source": [
+ "### Step 1: Prepare the dataset\n",
+ "\n",
+ "This dataset has already undergone several filtering and processing operations, and it can be used to train the model for various different tasks - question title generation (summarization), law domain question answering, and question tag generation (multi-label classification).\n",
+ "\n",
+ "Take a look at a single row in the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6b47e31",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# TRAIN, VAL and TEST splits all follow the same structure\n",
+ "!head -n1 {TRAIN_DS}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9bed493d",
+ "metadata": {},
+ "source": [
+ "You will see several fields in the `.jsonl`, including `title`, `question`, `answer`, and other associated metadata.\n",
+ "\n",
+ "For this tutorial, our input will be the `answer` field, and output will be it's `title`. \n",
+ "\n",
+ "The following cell does two things -\n",
+ "* Adds a template - a prompt instruction (which is optional), and format `{PROMPT} \\nQUESTION: {data[\"question\"]} \\nTITLE: `.\n",
+ "* Saves the data splits into the same location, also appending a `_preprocessed` marker to them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "188b93b7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Add a prompt instruction.\n",
+ "PROMPT='''Generate a concise, engaging title for the following legal question on an internet forum. The title should be legally relevant, capture key aspects of the issue, and entice readers to learn more.'''\n",
+ "\n",
+ "# Creates a preprocessed version of the data files\n",
+ "for input_file in [TRAIN_DS, VAL_DS, TEST_DS]:\n",
+ " output_file = input_file.rsplit('.', 1)[0] + '_preprocessed.jsonl'\n",
+ " with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:\n",
+ " for line in infile:\n",
+ " # Parse each line as JSON\n",
+ " data = json.loads(line)\n",
+ "\n",
+ " # Create a new dictionary with only the desired fields, renamed and formatted\n",
+ " new_data = {\n",
+ " \"input\": f'''{PROMPT} \\nQUESTION: {data[\"question\"]} \\nTITLE: ''',\n",
+ " \"output\": data['title']\n",
+ " }\n",
+ "\n",
+ " # Write the new data as a JSON line to the output file\n",
+ " json.dump(new_data, outfile)\n",
+ " outfile.write('\\n') # Add a newline after each JSON object\n",
+ "\n",
+ " print(f\"Processed {input_file} and created {output_file}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39388cc3",
+ "metadata": {},
+ "source": [
+ "After running the above scripts, you will see `law-qa-{train/test/val}_preprocessed.jsonl` files appear in the data directory.\n",
+ "\n",
+ "This is what an example will be formatted like -\n",
+ "\n",
+ "```json\n",
+ "{\"input\": \"Generate a concise, engaging title for the following legal question on an internet forum. The title should be legally relevant, capture key aspects of the issue, and entice readers to learn more. \\nQUESTION: In order to be sued in a particular jurisdiction, say New York, a company must have a minimal business presence in the jurisdiction. What constitutes such a presence? Suppose the company engaged a New York-based Plaintiff, and its representatives signed the contract with the Plaintiff in New York City. Does this satisfy the minimum presence rule? Suppose, instead, the plaintiff and contract signing were in New Jersey, but the company hired a law firm with offices in New York City. Does this qualify? \\nTITLE: \", \n",
+ " \"output\": \"What constitutes \\\"doing business in a jurisdiction?\\\"\"}\n",
+ "```\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f53038ad",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# clear up any cached mem-map file\n",
+ "!rm curated-data/*idx*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebd28f0d",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### Step 2: Run PEFT finetuning script for LoRA\n",
+ "\n",
+ "NeMo framework includes a high level python script for fine-tuning [megatron_gpt_finetuning.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py) that can abstract away some of the lower level API calls. Once you have your model downloaded and the dataset ready, LoRA fine-tuning with NeMo is essentially just running this script!\n",
+ "\n",
+ "For this demonstration, this training run is capped by `max_steps`, and validation is carried out every `val_check_interval` steps. If the validation loss does not improve after a few checks, training is halted to avoid overfitting.\n",
+ "\n",
+ "> `NOTE:` In the block of code below, pass the paths to your train, test and validation data files as well as path to the .nemo model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "15228de7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "%%bash\n",
+ "\n",
+ "# Set paths to the model, train, validation and test sets.\n",
+ "MODEL=\"/workspace/llama-3_1-8b-instruct-nemo_v1.0/llama3_1_8b_instruct.nemo\"\n",
+ "\n",
+ "TRAIN_DS=\"[./curated-data/law-qa-train_preprocessed.jsonl]\"\n",
+ "VALID_DS=\"[./curated-data/law-qa-val_preprocessed.jsonl]\"\n",
+ "TEST_DS=\"[./curated-data/law-qa-test_preprocessed.jsonl]\"\n",
+ "TEST_NAMES=\"[law]\"\n",
+ "\n",
+ "SCHEME=\"lora\"\n",
+ "TP_SIZE=1\n",
+ "PP_SIZE=1\n",
+ "\n",
+ "OUTPUT_DIR=\"./results/Meta-llama3.1-8B-Instruct-titlegen\"\n",
+ "rm -r $OUTPUT_DIR\n",
+ "\n",
+ "torchrun --nproc_per_node=1 \\\n",
+ "/opt/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \\\n",
+ " exp_manager.exp_dir=${OUTPUT_DIR} \\\n",
+ " exp_manager.explicit_log_dir=${OUTPUT_DIR} \\\n",
+ " trainer.devices=1 \\\n",
+ " trainer.num_nodes=1 \\\n",
+ " trainer.precision=bf16-mixed \\\n",
+ " trainer.val_check_interval=0.2 \\\n",
+ " trainer.max_steps=1000 \\\n",
+ " model.megatron_amp_O2=True \\\n",
+ " ++model.mcore_gpt=True \\\n",
+ " model.tensor_model_parallel_size=${TP_SIZE} \\\n",
+ " model.pipeline_model_parallel_size=${PP_SIZE} \\\n",
+ " model.micro_batch_size=1 \\\n",
+ " model.global_batch_size=32 \\\n",
+ " model.restore_from_path=${MODEL} \\\n",
+ " model.data.train_ds.file_names=${TRAIN_DS} \\\n",
+ " model.data.train_ds.concat_sampling_probabilities=[1.0] \\\n",
+ " model.data.validation_ds.file_names=${VALID_DS} \\\n",
+ " model.peft.peft_scheme=${SCHEME}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "268e4618",
+ "metadata": {},
+ "source": [
+ "This will create a LoRA adapter - a file named `megatron_gpt_peft_lora_tuning.nemo` in `./results/Meta-llama3.1-8B-Instruct-titlegen/checkpoints/`. We'll use this later.\n",
+ "\n",
+ "To further configure the run above -\n",
+ "\n",
+ "* **A different PEFT technique**: The `peft.peft_scheme` parameter determines the technique being used. In this case, we did LoRA, but NeMo Framework supports other techniques as well - such as P-tuning, Adapters, and IA3. For more information, refer to the [PEFT support matrix](https://docs.nvidia.com/nemo-framework/user-guide/latest/nemotoolkit/nlp/nemo_megatron/peft/landing_page.html). For example, for P-tuning, simply set \n",
+ "\n",
+ "```bash\n",
+ "model.peft.peft_scheme=\"ptuning\" # instead of \"lora\"\n",
+ "```\n",
+ "You can override many such configurations (such as `learning rate`, `adapter dim`, and more) while running the script. A full set of possible configurations is available in [NeMo Framework Github](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/language_modeling/tuning/conf/megatron_gpt_finetuning_config.yaml)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8fed5465",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Step 3: Inference with NeMo Framework\n",
+ "\n",
+ "Running text generation within the framework is also possible with running a Python script. Note that is more for testing and validation, not a full-fledged deployment solution like NVIDIA NIM."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "18a5adfc",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Check that the LORA model file exists\n",
+ "!ls -l ./results/Meta-llama3.1-8B-Instruct-titlegen/checkpoints"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50b1dc9b",
+ "metadata": {},
+ "source": [
+ "In the code snippet below, the following configurations are worth noting - \n",
+ "\n",
+ "1. `model.restore_from_path` to the path for the Meta-Llama-3.1-8B-Instruct.nemo file.\n",
+ "2. `model.peft.restore_from_path` to the path for the PEFT checkpoint that was created in the fine-tuning run in the last step.\n",
+ "3. `model.test_ds.file_names` to the path of the preprocessed test file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0bd9d602",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Create a smaller test subset for a quick eval demonstration.\n",
+ "\n",
+ "!head -n 128 ./curated-data/law-qa-test_preprocessed.jsonl > ./curated-data/law-qa-test_preprocessed-n128.jsonl"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d029b0b5",
+ "metadata": {},
+ "source": [
+ "If you have made any changes in model or experiment paths, please ensure they are configured correctly below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "630c0305",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "%%bash\n",
+ "MODEL=\"/workspace/llama-3_1-8b-instruct-nemo_v1.0/llama3_1_8b_instruct.nemo\"\n",
+ "\n",
+ "TEST_DS=\"[./curated-data/law-qa-test_preprocessed-n128.jsonl]\" # Smaller test split\n",
+ "# TEST_DS=\"[./curated-data/law-qa-test_preprocessed.jsonl]\" # Full test set\n",
+ "TEST_NAMES=\"[law]\"\n",
+ "\n",
+ "TP_SIZE=1\n",
+ "PP_SIZE=1\n",
+ "\n",
+ "# This is where your LoRA checkpoint was saved\n",
+ "PATH_TO_TRAINED_MODEL=\"/workspace/results/Meta-llama3.1-8B-Instruct-titlegen/checkpoints/megatron_gpt_peft_lora_tuning.nemo\"\n",
+ "\n",
+ "# The generation run will save the generated outputs over the test dataset in a file prefixed like so\n",
+ "OUTPUT_PREFIX=\"law_titlegen_lora\"\n",
+ "\n",
+ "python /opt/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_generate.py \\\n",
+ " model.restore_from_path=${MODEL} \\\n",
+ " model.peft.restore_from_path=${PATH_TO_TRAINED_MODEL} \\\n",
+ " trainer.devices=1 \\\n",
+ " trainer.num_nodes=1 \\\n",
+ " model.data.test_ds.file_names=${TEST_DS} \\\n",
+ " model.data.test_ds.names=${TEST_NAMES} \\\n",
+ " model.data.test_ds.global_batch_size=32 \\\n",
+ " model.data.test_ds.micro_batch_size=1 \\\n",
+ " model.data.test_ds.tokens_to_generate=25 \\\n",
+ " model.tensor_model_parallel_size=${TP_SIZE} \\\n",
+ " model.pipeline_model_parallel_size=${PP_SIZE} \\\n",
+ " inference.greedy=True \\\n",
+ " model.data.test_ds.output_file_path_prefix=${OUTPUT_PREFIX} \\\n",
+ " model.data.test_ds.write_predictions_to_file=True \\\n",
+ " model.data.test_ds.truncation_field=\"null\" \\\n",
+ " model.data.test_ds.add_bos=False \\\n",
+ " model.data.test_ds.add_eos=True \\\n",
+ " model.data.test_ds.add_sep=False \\\n",
+ " model.data.test_ds.label_key=\"output\" \\\n",
+ " model.data.test_ds.prompt_template=\"\\{input\\}\\ \\{output\\}\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "513cd732",
+ "metadata": {},
+ "source": [
+ "### Step 4: Check the model accuracy\n",
+ "\n",
+ "Now that the results are in, let's read the results and calculate the accuracy on the question title generation task.\n",
+ "Let's take a look at one of the predictions in the generated output file. The `pred` key indicates what was generated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "04cb4ae7",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Take a look at predictions\n",
+ "!head -n1 law_titlegen_lora_test_law_inputs_preds_labels.jsonl"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4e88f3c9",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "For evaluating this task, we will use [ROUGE](https://en.wikipedia.org/wiki/ROUGE_(metric)). It measures overlap of ngrams, and a higher score is better. While it's not perfect and it misses capturing the semantics of the prediction, it is a popular metric in academia and industry for evaluating such systems. \n",
+ "\n",
+ "The following method uses the `rouge_score` library to implement scoring. It will report `ROUGE_{1/2/L/Lsum}` metrics."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c4aa9631",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def compute_rouge(input_file: str) -> dict:\n",
+ " ROUGE_KEYS = [\"rouge1\", \"rouge2\", \"rougeL\", \"rougeLsum\"]\n",
+ " scorer = rouge_scorer.RougeScorer(ROUGE_KEYS, use_stemmer=True)\n",
+ " aggregator = scoring.BootstrapAggregator()\n",
+ " lines = [json.loads(line) for line in open(input_file)]\n",
+ " num_response_words = []\n",
+ " num_ref_words = []\n",
+ " for idx, line in enumerate(lines):\n",
+ " prompt = line['input']\n",
+ " response = line['pred']\n",
+ " answer = line['label']\n",
+ " scores = scorer.score(response, answer)\n",
+ " aggregator.add_scores(scores)\n",
+ " num_response_words.append(len(response.split()))\n",
+ " num_ref_words.append(len(answer.split()))\n",
+ "\n",
+ " result = aggregator.aggregate()\n",
+ " rouge_scores = {k: round(v.mid.fmeasure * 100, 4) for k, v in result.items()}\n",
+ " print(rouge_scores)\n",
+ " print(f\"Average and stddev of response length: {np.mean(num_response_words):.2f}, {np.std(num_response_words):.2f}\")\n",
+ " print(f\"Average and stddev of ref length: {np.mean(num_ref_words):.2f}, {np.std(num_ref_words):.2f}\")\n",
+ "\n",
+ " return rouge_scores"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "41c661d5",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "compute_rouge(\"./law_titlegen_lora_test_law_inputs_preds_labels.jsonl\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f48667b1",
+ "metadata": {},
+ "source": [
+ "For the Llama-3.1-8B-Instruct model, you should see accuracy comparable to the below:\n",
+ "```\n",
+ "{'rouge1': 39.2082, 'rouge2': 18.8573, 'rougeL': 35.4098, 'rougeLsum': 35.3906}\n",
+ "```"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/llm/mamba/mamba.rst b/tutorials/llm/mamba/mamba.rst
index 2ce5ee5f616b..2704c15aa05b 100644
--- a/tutorials/llm/mamba/mamba.rst
+++ b/tutorials/llm/mamba/mamba.rst
@@ -37,18 +37,36 @@ Step-by-step Guide for Fine-Tuning
Checkpoints from HuggingFace
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Obtain the desired checkpoint from HuggigFace.
+Obtain the desired checkpoint from HuggigFace. The checkpoints below have different arrangement and there are a few preprocessing step for each.
+
+1. `Repository `__ for the Mamba2 and Mamba2-Hybrid models by `NVIDIA `__.
+ The checkpoint from this repository is located in files tab under ``release/mp_rank_00/model_optim_rng.pt``. The tokenizer is under files tab and is named ``mt_nlg_plus_multilingual_ja_zh_the_stack_frac_015_256k.model``. You need both of these for conversion to ``.nemo`` checkpoint.
+
+2. `Repository `__ for the Mamba2 models from the `Transformers are SSMs paper `__.
+ For checkpoints from this repository, run the following python script to convert the pytorch checkpoint (`pytorch_model.bin` in the HuggingFace model card) to a format similar to the 8b models:
+
+ .. code:: python
+
+ import torch
+ import os
+
+ ckpt_path = "/path/to/pytorch_model.bin"
+ pyt_checkpoint = torch.load(ckpt_path)
+ new_ckpt_path = os.path.join(os.path.dirname(ckpt_path), f"wrapped_{os.path.basename(ckpt_path)}")
+
+ # Save the new checkpoint which will be used as the input to the conversion script
+ torch.save({"model": pyt_checkpoint}, new_ckpt_path)
+
+ You will use this ``wrapped_pytorch_model.bin`` for the conversion to ``.nemo`` in the next step.
-* `Repository `__ for the Mamba2 models from the `Transformers are SSMs paper `__.
-* `Repository `__ for the Mamba2 and Mamba2-Hybrid models by `NVIDIA `__.
Convert the Pytorch Checkpoint to a NeMo Checkpoint
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-1. Get into NVIDIA Container
+1. Get into the NVIDIA dev container from `NGC `_, or the 24.07 container (once released).
-2. Run the conversion script from . For this conversion script, you should provide the PyTorch state dictionary of the model for ``input_name_or_path``, i.e. this argument only accepts a single ``state_dict``.
+2. Run the conversion script from . For this conversion script, you should provide the checkpoint (and tokenizer in the case of 8b models) from the previous step for ``input_name_or_path``.
.. code:: bash
@@ -56,7 +74,9 @@ Convert the Pytorch Checkpoint to a NeMo Checkpoint
--input_name_or_path \
--output_path \
--mamba_ssm_ngroups 8 \
- --precision bf16
+ --precision bf16 \
+ --tokenizer_model_dir= # Remove this line (or set it to None) for 130m, 370m, 780m, 1.3b, and 2.7b models.
+
* Note: the ``mamba_ssm_ngroups`` parameter should be 1 for the Mamba2 models from the `Transformers are SSMs paper `__ (130m, 370m, 780m, 1.3b, and 2.7b) and 8 for the Mamba2 and Mamba2-Hybrid models by `NVIDIA `__ (both 8b).
@@ -69,7 +89,7 @@ The HuggingFace checkpoint for the 8b model is for TP of size 1, and so is the `
.. code:: bash
- python /opt/NeMo/examples/nlp/language_modeling/mamba_change_num_partition.py \
+ CUDA_VISIBLE_DEVICES="0" python /opt/NeMo/examples/nlp/language_modeling/mamba_change_num_partition.py \
--model_file= \
--target_file= \
--tensor_model_parallel_size=1 \
@@ -79,7 +99,7 @@ The HuggingFace checkpoint for the 8b model is for TP of size 1, and so is the `
After running this script, a ``.nemo`` model along with the TP-size number of folders (4 in this example) will be generated in the target path. The folders for each rank will be displayed as ``mp_rank_00`` to ``mp_rank_03`` in this example.
-* Note: You can only use Tensor Parallelism for the 8b models by `NVIDIA `__ (Mamba2 8b and Mamba2-Hybrid 8b). This is due to the fact that the ``nroups`` parameter in the model architecture should be divisible by TP size. ``nroups`` parameter is 8 for NVIDIA models and 1 for other models in the list.
+* Note: You can only use Tensor Parallelism for the 8b models by `NVIDIA `__ (Mamba2 8b and Mamba2-Hybrid 8b). This is due to the fact that the ``mamba_ssm_ngroups`` parameter in the model architecture should be divisible by TP size. ``mamba_ssm_ngroups`` parameter is 8 for NVIDIA models and 1 for other models in the list.
Run Fine-Tuning
^^^^^^^^^^^^^^^
@@ -93,21 +113,21 @@ Run Fine-Tuning
MBS=4
GBS=128
- TP=2 # According to the saved checkpoint
+ TP=4 # According to the saved checkpoint
SP=True # True only if TP>1 otherwise False
SEQ_LEN=2048
- NUM_DEVICES=2
+ NUM_DEVICES=8
PATH_TO_NEMO_MODEL=
TRAIN_DATASET_PATH=
VAL_DATASET_PATH=
- CONFIG_PATH="/opt/NeMo/examples/nlp/language_modeling/conf/"
+ CONFIG_PATH="/opt/NeMo/examples/nlp/language_modeling/tuning/conf/"
CONFIG_NAME="megatron_mamba_finetuning_config"
SAVE_DIR=
export NVTE_FUSED_ATTN=1
export NVTE_FLASH_ATTN=0
- torchrun --nproc_per_node=${NUM_DEVICES}
+ torchrun --nproc_per_node=${NUM_DEVICES} \
/opt/NeMo/examples/nlp/language_modeling/tuning/megatron_mamba_finetuning.py \
--config-path=${CONFIG_PATH} \
--config-name=${CONFIG_NAME} \
@@ -135,7 +155,6 @@ Run Fine-Tuning
model.optim.name="distributed_fused_adam" \
model.data.train_ds.max_seq_length=${SEQ_LEN} \
model.data.validation_ds.max_seq_length=${SEQ_LEN} \
- model.mcore_gpt=True \
model.micro_batch_size=${MBS} \
model.global_batch_size=${GBS} \
model.restore_from_path=${PATH_TO_NEMO_MODEL} \
@@ -144,8 +163,6 @@ Run Fine-Tuning
model.optim.lr=5e-6 \
model.optim.sched.min_lr=1e-7
-* Note: The tokenizer for 8b models (Mamba2 8b and MAmba2-Hybrid 8b) can be found in the `HuggingFace repository `__. Download it a set its path to ``TOKENIZER_MODEL`` (the tokenizer model file is under the name of ```mt_nlg_plus_multilingual_ja_zh_the_stack_frac_015_256k.model```). For other models, set ``TOKENIZER_MODEL=null`` since it will be downloaded from HuggingFace at the time of run.
-
Evaluating the Fine-Tuned Model
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -156,13 +173,12 @@ Evaluating the Fine-Tuned Model
MBS=32
GBS=64
- TP=2 # According to the fine-tuned checkpoint
+ TP=4 # According to the fine-tuned checkpoint
SP=True # True only if TP>1 otherwise False
SEQ_LEN=2048
- NUM_DEVICES=2
+ NUM_DEVICES=8
PATH_TO_NEMO_MODEL=
- TRAIN_DATASET_PATH=
- VAL_DATASET_PATH=
+ TEST_DATASET="[]"
CONFIG_PATH="/opt/NeMo/examples/nlp/language_modeling/tuning/conf/"
CONFIG_NAME="megatron_mamba_finetuning_config"
SAVE_DIR=
@@ -170,12 +186,11 @@ Evaluating the Fine-Tuned Model
export NVTE_FUSED_ATTN=1
export NVTE_FLASH_ATTN=0
- TEST_DATASET="[]"
CONFIG_PATH="/opt/NeMo/examples/nlp/language_modeling/tuning/conf/"
CONFIG_NAME="megatron_mamba_generate_config"
- MASTER_PORT=15008 torchrun --nproc_per_node=${NUM_DEVICES} /opt/NeMo/examples/nlp/language_modeling/tuning/megatron_mamba_generate.py \
+ torchrun --nproc_per_node=${NUM_DEVICES} /opt/NeMo/examples/nlp/language_modeling/tuning/megatron_mamba_generate.py \
--config-path=${CONFIG_PATH} \
--config-name=${CONFIG_NAME} \
trainer.devices=${NUM_DEVICES} \
@@ -196,11 +211,11 @@ Evaluating the Fine-Tuned Model
+model.peft.restore_from_ckpt.checkpoint_dir=False \
+model.peft.restore_from_ckpt.checkpoint_name=False \
model.tensor_model_parallel_size=${TP} \
- model.sequence_parallel=$SP \
model.micro_batch_size=${MBS} \
model.global_batch_size=${GBS} \
model.restore_from_path=${PATH_TO_NEMO_MODEL} \
model.data.test_ds.file_names=${TEST_DATASET} \
+ model.data.test_ds.names=["squad"] \
model.data.test_ds.global_batch_size=${GBS} \
model.data.test_ds.micro_batch_size=${MBS} \
model.data.test_ds.tokens_to_generate=30 \
@@ -219,7 +234,7 @@ Evaluating the Fine-Tuned Model
Inference
^^^^^^^^^
-For running inference on a Mamba model, one should use ``megatron_mamba_eval.py`` script. For example:
+For running inference on a Mamba model, one should use ``megatron_mamba_eval.py`` script. This evaluation script currently requires tensor/model parallel (TP1) of size one. If your checkpoint has TP>1, use the TP conversion step from above and set ``target_tensor_model_parallel_size=1``. The following is an example for using evaluation script:
.. code:: bash
diff --git a/tutorials/multimodal/LITA Tutorial.ipynb b/tutorials/multimodal/LITA Tutorial.ipynb
index d86b0d296198..1dfc70add81a 100644
--- a/tutorials/multimodal/LITA Tutorial.ipynb
+++ b/tutorials/multimodal/LITA Tutorial.ipynb
@@ -12,17 +12,22 @@
"metadata": {},
"source": [
"### Note:\n",
- "Currently, this notebook must be run in a NeMo container (> 24.04). An example command to launch the container:\n",
+ "Currently, this notebook can be run in a NeMo container (>= 24.07). An example command to launch the container:\n",
"\n",
"```\n",
- "docker run --gpus all -it --rm -v :/opt/NeMo --shm-size=8g -p 8888:8888 --ulimit memlock=-1 --ulimit stack=67108864 \n",
+ "docker run --gpus all -it --rm -v $PWD:/ws --shm-size=8g -p 8888:8888 --ulimit memlock=-1 --ulimit stack=67108864 \n",
"```\n",
"For inference and finetuning, you need to increase the share memory size to avoid some OOM issue. For example,\n",
"```\n",
- "docker run --gpus all -it --rm -v :/opt/NeMo -v $PWD:/ws --shm-size=128g -p 8888:8888 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:dev\n",
+ "docker run --gpus all -it --rm -v $PWD:/ws --shm-size=128g -p 8888:8888 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:dev\n",
"```\n",
"\n",
- "By `-v $PWD:/ws`, we can mount the current local directory to `/ws/` in docker container. We may use this local directory to put the `NeMo` source code, checkpoints and dataset we will generate."
+ "By `-v $PWD:/ws`, we can mount the current local directory to `/ws/` in docker container. We may use this local directory to put the `NeMo` source code, checkpoints and dataset we will generate.\n",
+ "\n",
+ "If you wanna use NeMo container (>24.04 and < 24.07) (not recommended), you need to manually mount the latest nemo:\n",
+ "```\n",
+ "docker run --gpus all -it --rm -v :/opt/NeMo -v $PWD:/ws --shm-size=128g -p 8888:8888 --ulimit memlock=-1 --ulimit stack=67108864 \n",
+ "```"
]
},
{
@@ -66,7 +71,7 @@
"source": [
"### Tokenizer conversion\n",
"Here we show how to add 100 time tokens and some nemo extra tokens to a huggingface tokenizer.\n",
- "For the definition of nemo extra tokens, please refer to `NeMo/nemo/collections/multimodal/data/neva/conversation.py`.\n"
+ "For the definition of nemo extra tokens, please refer to `/opt/NeMo/nemo/collections/multimodal/data/neva/conversation.py`.\n"
]
},
{
@@ -136,7 +141,7 @@
"metadata": {},
"source": [
"### Checkpoint Conversion\n",
- "Since VILA and LITA shared a similar model structure as LLaVA, we'll leverage `NeMo/examples/multimodal/multimodal_llm/neva/convert_llava_to_neva.py` for converting the checkpoint. Since VILA and LITA depends on LLaVA, we need to clone LLaVA first.\n"
+ "Since VILA and LITA shared a similar model structure as LLaVA, we'll leverage `/opt/NeMo/examples/multimodal/multimodal_llm/neva/convert_llava_to_neva.py` for converting the checkpoint. Since VILA and LITA depends on LLaVA, we need to clone LLaVA first.\n"
]
},
{
@@ -323,17 +328,17 @@
"pip install moviepy\n",
"\n",
"#download videos, this may take a while\n",
- "python NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/train_steps.json -o /ws/dataset -d True\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/train_steps.json -o /ws/dataset -d True\n",
"\n",
"#chunk videos into clips, with each clip containing 120 seconds\n",
- "python NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/train_steps.json -o /ws/dataset -l 12\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/train_steps.json -o /ws/dataset -l 12\n",
"\n",
"#create evaluation dataset\n",
- "python NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/valid/valid_steps.json -o /ws/dataset/valid/ -d True\n",
- "python NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/valid_steps.json -o /ws/dataset/valid/ -l 120\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/valid/valid_steps.json -o /ws/dataset/valid/ -d True\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/prepare_youmakeup.py -i YouMakeup/data/train/valid_steps.json -o /ws/dataset/valid/ -l 120\n",
"\n",
"#create QA style validation/evaluation or test dataset\n",
- "python3 NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_evaluation.py --input /ws/dataset/valid/train.json --output_file=/ws/dataset/valid/rtl_eval.json"
+ "python3 /opt/NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_evaluation.py --input /ws/dataset/valid/train.json --output_file=/ws/dataset/valid/rtl_eval.json"
]
},
{
@@ -364,14 +369,14 @@
"source": [
"%%bash\n",
"# generate custom caption dataset and multiply the dataset by three times\n",
- "python NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py \\\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py \\\n",
" --input_dvc_dataset /ws/dataset/train.json \\\n",
" --video_path_prefix /ws/dataset/videos/ \\\n",
" --subtask custom_caption --data_multiplier 3 \\\n",
" --output_file /ws/dataset/vc_train.json\n",
"\n",
"# generate event loalization dataset and increase the dataset by three times\n",
- "python NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py \\\n",
+ "python /opt/NeMo/scripts/multimodal_dataset_conversion/convert_dvc_dataset_for_training.py \\\n",
" --input_dvc_dataset /ws/dataset/train.json \\\n",
" --video_path_prefix /ws/dataset/videos/ \\\n",
" --subtask event_localization --data_multiplier 3 \\\n",
@@ -598,7 +603,7 @@
"outputs": [],
"source": [
"%%bash\n",
- "python3 NeMo/examples/multimodal/multimodal_llm/neva/eval/eval_video_rtl.py \\\n",
+ "python3 /opt/NeMo/examples/multimodal/multimodal_llm/neva/eval/eval_video_rtl.py \\\n",
" --input_file=/ws/dataset/valid/split_output/nemo_infer_output_total.json \\\n",
" --output_dir=/ws/dataset/valid/split_output/ --save_mid_result"
]
@@ -607,7 +612,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "You many also refer to `NeMo/examples/multimodal/multimodal_llm/neva/eval/eval_vqa.py` to check how to use external LLM API to do the video question answering task evaluation."
+ "You many also refer to `/opt/NeMo/examples/multimodal/multimodal_llm/neva/eval/eval_vqa.py` to check how to use external LLM API to do the video question answering task evaluation."
]
}
],
diff --git a/tutorials/multimodal/NeVA Tutorial.ipynb b/tutorials/multimodal/NeVA Tutorial.ipynb
index b57bdb47df57..1ad1101a0299 100644
--- a/tutorials/multimodal/NeVA Tutorial.ipynb
+++ b/tutorials/multimodal/NeVA Tutorial.ipynb
@@ -2,8 +2,13 @@
"cells": [
{
"cell_type": "markdown",
- "id": "a2225742c5996304",
- "metadata": {},
+ "id": "b29a4b72-31bb-4268-9598-2cd2b6f7475e",
+ "metadata": {
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"source": [
"# NeVA Training / Inference Tutorial\n",
"\n",
@@ -20,28 +25,19 @@
"\n",
"This notebook illustrates how to train and perform inference using NeVA with the NeMo Toolkit. NeVA originates from [LLaVA](https://github.com/haotian-liu/LLaVA) (Large Language and Vision Assistant) and is a powerful multimodal image-text instruction tuned model optimized within the NeMo Framework. \n",
"\n",
- "\n",
"This tutorial will guide you through the following topics:\n",
- "1. Training a NeVA model\n",
- "2. Performing inference with the trained model\n",
+ "1. Prepare pre-requisites for NeVA training\n",
+ "2. Training a NeVA model\n",
+ "3. Performing inference with the trained model\n",
"\n",
"## Datasets\n",
"\n",
- "After downloading all below datasets for pretraining and instruction tuning, your dataset directory should look something similar to:\n",
+ "Please refer to [NeMo User Guide](https://docs.nvidia.com/nemo-framework/user-guide/latest/multimodalmodels/multimodallanguagemodel/neva/dataprep.html#prepare-pretraining-and-fine-tuning-datasets) for preparing NeVA dataset for pretrain and fine-tuning.\n",
"\n",
- "```\n",
- "LLaVA-Pretrain-LCS-558K\n",
- "├── blip_laion_cc_sbu_558k.json\n",
- "├── images\n",
- "LLaVA-Instruct-mixture\n",
- "├── llava_v1_5_mix665k.json\n",
- "└── images\n",
- " └── ...\n",
- "```\n",
"\n",
"### Pre-Training Dataset\n",
"\n",
- "The pre-training dataset is open-sourced from the LLaVA implementation and can be downloaded [here](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain). The dataset consists of a 558K subset of the LAION-CC-SBU dataset with BLIP captions. \n",
+ "The pre-training dataset is open-sourced from the LLaVA implementation and can be downloaded [here](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain). The dataset consists of a 558K subset of the LAION-CC-SBU dataset with BLIP captions.\n",
"\n",
"The associated images for pretraining can be downloaded via HuggingFace [here](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/blob/main/images.zip).\n",
"\n",
@@ -66,14 +62,75 @@
" └── VG_100K_2\n",
"```\n",
"\n",
- "## Training\n",
+ "After downloading all below datasets for pretraining and instruction tuning, please put data folder at `/workspace/datasets`. Your dataset directory should look something similar to:\n",
+ "\n",
+ "```\n",
+ "LLaVA-Pretrain-LCS-558K\n",
+ "├── blip_laion_cc_sbu_558k.json\n",
+ "├── images\n",
+ "LLaVA-Instruct-mixture\n",
+ "├── llava_v1_5_mix665k.json\n",
+ "└── images\n",
+ " └── ...\n",
+ "```\n",
+ "\n",
+ "## Setting up Checkpoint and Tokenizer\n",
+ "\n",
+ "In this notebook, we first need to convert the Vicuna 1.5 checkpoint into the .nemo format. Meanwhile, special tokens must be incorporated into the tokenizer for NeVA training. After downloading language models from Hugging Face, ensure you also fetch the corresponding tokenizer model. Using the 7B-chat model as a reference."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6d80adff-bd3a-40e0-9441-684328ec7596",
+ "metadata": {
+ "vscode": {
+ "languageId": "plaintext"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "! mkdir -p /workspace/checkpoints\n",
+ "\n",
+ "# Download vicuna checkpoint from HF\n",
+ "! git clone https://huggingface.co/lmsys/vicuna-7b-v1.5 /workspace/checkpoints/vicuna-7b-v1.5\n",
+ "\n",
+ "# Convert checkpoint\n",
+ "! python /opt/NeMo/scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \\\n",
+ " --input_name_or_path /workspace/checkpoints/vicuna-7b-v1.5 \\\n",
+ " --output_path /workspace/checkpoints/vicuna-7b-v1.5.nemo\n",
+ "\n",
+ "# Prepare tokenizer\n",
+ "! cd /opt && git clone https://github.com/google/sentencepiece.git && \\\n",
+ " cd sentencepiece && \\\n",
+ " mkdir build && \\\n",
+ " cd build && \\\n",
+ " cmake .. && \\\n",
+ " make && \\\n",
+ " make install && \\\n",
+ " ldconfig && \\\n",
+ "cd /opt/sentencepiece/src/ && protoc --python_out=/opt/NeMo/scripts/tokenizers/ sentencepiece_model.proto\n",
"\n",
+ "! python /opt/NeMo/scripts/tokenizers/add_special_tokens_to_sentencepiece.py \\\n",
+ "--input_file /workspace/checkpoints/vicuna-7b-v1.5/tokenizer.model \\\n",
+ "--output_file /workspace/checkpoints/vicuna-7b-v1.5/tokenizer_neva.model \\\n",
+ "--is_userdefined \\\n",
+ "--tokens \"\" \"\" \"\" \"\" \\\n",
+ " \"\" \"\" \"\" \"\"\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b619e0a",
+ "metadata": {},
+ "source": [
+ "## Training\n",
"\n",
"### Feature Alignment Pre-Training\n",
"\n",
"We provide a set of scripts for pre-training and fine-tuning which can be kicked off with CLI flags defining specified arguments. \n",
"\n",
- "An example of a pre-training script execution:"
+ "An example of a pre-training script execution (note the scripts will only perform 100 steps with a small micro batch size, this is not a full training):"
]
},
{
@@ -92,61 +149,58 @@
" trainer.precision=bf16 \\\n",
" trainer.num_nodes=1 \\\n",
" trainer.devices=4 \\\n",
- " trainer.val_check_interval=1000 \\\n",
+ " trainer.val_check_interval=50 \\\n",
" trainer.limit_val_batches=5 \\\n",
" trainer.log_every_n_steps=1 \\\n",
- " trainer.max_steps=1000 \\\n",
+ " trainer.max_steps=100 \\\n",
" model.megatron_amp_O2=True \\\n",
" model.micro_batch_size=1 \\\n",
- " model.global_batch_size=2 \\\n",
- " model.tensor_model_parallel_size=4 \\\n",
+ " model.global_batch_size=4 \\\n",
+ " model.tensor_model_parallel_size=1 \\\n",
" model.pipeline_model_parallel_size=1 \\\n",
" model.mcore_gpt=True \\\n",
" model.transformer_engine=True \\\n",
- " model.data.data_path=/path/to/datasets/LLaVA-Pretrain-LCS-558K/blip_laion_cc_sbu_558k.json \\\n",
- " model.data.image_folder=/path/to/dataset/LLaVA-Pretrain-LCS-558K/images \\\n",
+ " model.data.data_path=/workspace/datasets/LLaVA-Pretrain-LCS-558K/blip_laion_cc_sbu_558k.json \\\n",
+ " model.data.image_folder=/workspace/datasets/LLaVA-Pretrain-LCS-558K/images \\\n",
" model.tokenizer.library=sentencepiece \\\n",
- " model.tokenizer.model=/path/to/tokenizer/model \\\n",
+ " model.tokenizer.model=/workspace/checkpoints/vicuna-7b-v1.5/tokenizer_neva.model \\\n",
" model.encoder_seq_length=4096 \\\n",
" model.num_layers=32 \\\n",
" model.hidden_size=4096 \\\n",
- " model.ffn_hidden_size=16384 \\\n",
+ " model.ffn_hidden_size=11008 \\\n",
" model.num_attention_heads=32 \\\n",
- " model.normalization=layernorm1p \\\n",
+ " model.normalization=rmsnorm \\\n",
" model.do_layer_norm_weight_decay=False \\\n",
" model.apply_query_key_layer_scaling=True \\\n",
- " model.activation=squared-relu \\\n",
+ " model.bias=False \\\n",
+ " model.activation=fast-swiglu \\\n",
" model.headscale=False \\\n",
" model.position_embedding_type=rope \\\n",
- " model.rotary_percentage=0.5 \\\n",
+ " model.rotary_percentage=1.0 \\\n",
" model.num_query_groups=null \\\n",
" model.data.num_workers=0 \\\n",
- " model.mm_cfg.llm.from_pretrained=/path/to/checkpoint \\\n",
- " model.mm_cfg.llm.model_type=nvgpt \\\n",
- " model.data.conv_template=nvgpt \\\n",
+ " model.mm_cfg.llm.from_pretrained=/workspace/checkpoints/vicuna-7b-v1.5.nemo \\\n",
+ " model.mm_cfg.llm.model_type=v1 \\\n",
+ " model.data.conv_template=v1 \\\n",
" model.mm_cfg.vision_encoder.from_pretrained='openai/clip-vit-large-patch14' \\\n",
" model.mm_cfg.vision_encoder.from_hf=True \\\n",
- " model.data.image_token_len=256 \\\n",
" model.optim.name=\"fused_adam\" \\\n",
" exp_manager.create_checkpoint_callback=True \\\n",
- " exp_manager.create_wandb_logger=False \\\n",
- " exp_manager.wandb_logger_kwargs.project=neva_demo"
+ " exp_manager.create_wandb_logger=False"
]
},
{
"cell_type": "markdown",
- "id": "6b619e0a",
+ "id": "f24ee70d-3025-47f6-8571-295b024c3e05",
"metadata": {},
"source": [
- "\n",
- "\n",
"**Note**: To initialize training a model from scratch rather than from a pretrained checkpoint, you may specify `null` instead of a path in the CLI arguments.\n",
"\n",
"### Image-Language Pair Instruction Fine-Tuning\n",
"\n",
- "Fine-tuning can also be run from within the container via a similar command leveraging the `neva_finetune.py` script.\n",
+ "Fine-tuning can also be run from within the container via a similar command leveraging the `neva_finetune.py` script. We leverage the checkpoint saved from pretrain step to further finetune it, given by `model.restore_from_path=/workspace/nemo_experiments/nemo_neva/checkpoints/nemo_neva.nemo`.\n",
"\n",
- "An example of an image-text pair instruction tuning script execution:"
+ "An example of an image-text pair instruction tuning script execution (note the scripts will only perform 1000 steps with a small micro batch size, this is not a full training):"
]
},
{
@@ -164,42 +218,44 @@
"++cluster_type=BCP \\\n",
" trainer.precision=bf16 \\\n",
" trainer.num_nodes=1 \\\n",
- " trainer.devices=1 \\\n",
- " trainer.val_check_interval=100 \\\n",
+ " trainer.devices=4 \\\n",
+ " trainer.val_check_interval=50 \\\n",
" trainer.limit_val_batches=50 \\\n",
- " trainer.max_steps=4900 \\\n",
+ " trainer.max_steps=100 \\\n",
+ " model.restore_from_path=/workspace/nemo_experiments/nemo_neva/checkpoints/nemo_neva.nemo \\\n",
" model.megatron_amp_O2=True \\\n",
- " model.micro_batch_size=4 \\\n",
- " model.global_batch_size=32 \\\n",
- " model.tensor_model_parallel_size=1 \\\n",
+ " model.micro_batch_size=1 \\\n",
+ " model.global_batch_size=2 \\\n",
+ " model.tensor_model_parallel_size=4 \\\n",
" model.pipeline_model_parallel_size=1 \\\n",
" model.mcore_gpt=True \\\n",
" model.transformer_engine=True \\\n",
- " model.data.data_path=/path/to/dataset/LLaVA-Pretrain-LCS-558K/blip_laion_cc_sbu_558k.json \\\n",
- " model.data.image_folder=/path/to/dataset/LLaVA-Pretrain-LCS-558K/images \\\n",
- " model.tokenizer.library=megatron \\\n",
- " model.tokenizer.model=/path/to/tokenizer \\\n",
+ " model.data.data_path=/workspace/datasets/LLaVA-Instruct-mixture/llava_v1_5_mix665k.json \\\n",
+ " model.data.image_folder=/workspace/datasets/LLaVA-Instruct-mixture/images \\\n",
+ " model.tokenizer.library=sentencepiece \\\n",
+ " model.tokenizer.model=/workspace/checkpoints/vicuna-7b-v1.5/tokenizer_neva.model \\\n",
" model.encoder_seq_length=4096 \\\n",
- " model.num_layers=24 \\\n",
- " model.hidden_size=2048 \\\n",
- " model.ffn_hidden_size=5440 \\\n",
- " model.num_attention_heads=16 \\\n",
- " model.normalization=layernorm1p \\\n",
+ " model.num_layers=32 \\\n",
+ " model.hidden_size=4096 \\\n",
+ " model.ffn_hidden_size=11008 \\\n",
+ " model.num_attention_heads=32 \\\n",
+ " model.normalization=rmsnorm \\\n",
" model.do_layer_norm_weight_decay=False \\\n",
" model.apply_query_key_layer_scaling=True \\\n",
+ " model.bias=False \\\n",
" model.activation=fast-swiglu \\\n",
" model.headscale=False \\\n",
" model.position_embedding_type=rope \\\n",
- " model.rotary_percentage=0.5 \\\n",
+ " model.rotary_percentage=1.0 \\\n",
" model.num_query_groups=null \\\n",
- " model.data.num_workers=8 \\\n",
- " model.mm_cfg.llm.from_pretrained=/path/to/checkpoint \\\n",
- " model.mm_cfg.llm.model_type=nvgpt \\\n",
- " exp_manager.create_checkpoint_callback=True \\\n",
- " model.data.conv_template=nvgpt \\\n",
+ " model.data.num_workers=0 \\\n",
+ " model.mm_cfg.llm.from_pretrained=/workspace/checkpoints/vicuna-7b-v1.5.nemo \\\n",
+ " model.mm_cfg.llm.model_type=v1 \\\n",
+ " model.data.conv_template=v1 \\\n",
" model.mm_cfg.vision_encoder.from_pretrained='openai/clip-vit-large-patch14' \\\n",
" model.mm_cfg.vision_encoder.from_hf=True \\\n",
- " model.data.image_token_len=256 \\\n",
+ " exp_manager.create_checkpoint_callback=True \\\n",
+ " exp_manager.name=\"nemo_neva_finetune\" \\\n",
" model.optim.name=\"fused_adam\""
]
},
@@ -212,38 +268,7 @@
"\n",
"### From Pre-trained Checkpoints\n",
"\n",
- "If you would like to use NeVA for inference from pre-trained checkpoint via HuggingFace, you can convert from HuggingFace to `.nemo` first.\n",
- "\n",
- "First, download the model checkpoint from HuggingFace [here](https://huggingface.co/liuhaotian/llava-v1.5-7b). The tokenizer (stored as `tokenizer.model` within the pretrained checkpoint) must be modified with the following commands:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "0d30003f",
- "metadata": {
- "vscode": {
- "languageId": "plaintext"
- }
- },
- "outputs": [],
- "source": [
- "! cd /opt/sentencepiece/src/\n",
- "! protoc --python_out=/opt/NeMo/scripts/tokenizers/ sentencepiece_model.proto\n",
- "! python /opt/NeMo/scripts/tokenizers/add_special_tokens_to_sentencepiece.py \\\n",
- "--input_file /path/to/tokenizer.model \\\n",
- "--output_file /path/to/tokenizer_neva.model \\\n",
- "--is_userdefined \\\n",
- "--tokens \"\" \"\" \"\" \"\" \\\n",
- " \"\" \"\" \"\" \"\""
- ]
- },
- {
- "cell_type": "markdown",
- "id": "470c093b",
- "metadata": {},
- "source": [
- "Finally, convert to `.nemo` via the provided script:"
+ "If you would like to use NeVA for inference from pre-trained checkpoint via HuggingFace, you can use the checkpoint from fine-tune step or convert from HuggingFace to `.nemo` first. Since we didn't finish full training in this tutorial with NeMo. We will instruct how you can convert a checkpoint from Hugging Face."
]
},
{
@@ -257,12 +282,10 @@
},
"outputs": [],
"source": [
- "! git clone --depth 1 --branch v1.2.2 https://github.com/haotian-liu/LLaVA/\n",
- "! export PYTHONPATH=/opt/LLaVA/:$PYTHONPATH\n",
- "! python /opt/NeMo/examples/multimodal/multimodal_llm/neva/convert_llava_to_neva.py \\\n",
- "--in-file /path/to/llava-v1.5-7b \\\n",
- "--out-file /path/to/llava-v1.5-7b.nemo \\\n",
- "--tokenizer-model /path/to/tokenizer_neva.model"
+ "! python3 /opt/NeMo/scripts/checkpoint_converters/convert_llava_hf_to_nemo.py \\\n",
+ " --input_name_or_path llava-hf/llava-1.5-7b-hf \\\n",
+ " --output_path /workspace/checkpoints/llava-7b.nemo \\\n",
+ " --tokenizer_path /workspace/checkpoints/vicuna-7b-v1.5/tokenizer_neva.model"
]
},
{
@@ -290,57 +313,19 @@
},
"outputs": [],
"source": [
+ "! echo '{\"image\": \"RTX4080.png\", \"prompt\": \"\\nCan you describe this image?\"}' > sample.json\n",
+ "! mkdir images && wget https://assets.nvidia.partners/images/png/TUF_Gaming_GeForce_RTX_4080_SUPER_OC_edition_packaging_with_card__12419.png --output-document=images/RTX4080.png\n",
"! torchrun --nproc_per_node=1 /opt/NeMo/examples/multimodal/multimodal_llm/neva/neva_evaluation.py \\\n",
"tensor_model_parallel_size=1 \\\n",
"pipeline_model_parallel_size=1 \\\n",
- "neva_model_file=/path/to/checkpoint \\\n",
+ "neva_model_file=/workspace/checkpoints/llava-7b.nemo \\\n",
"trainer.devices=1 \\\n",
"trainer.precision=bf16 \\\n",
- "prompt_file=/path/to/prompt/file \\\n",
- "inference.media_base_path=/path/to/image \\\n",
- "output_file=path/for/output/file/ \\\n",
+ "prompt_file=sample.json \\\n",
+ "inference.media_base_path=images \\\n",
+ "output_file=output.json \\\n",
"inference.temperature=0.2 \\\n",
- "inference.top_k=0 \\\n",
- "inference.top_p=0.9 \\\n",
- "inference.greedy=False \\\n",
- "inference.add_BOS=False \\\n",
- "inference.all_probs=False \\\n",
- "inference.repetition_penalty=1.2 \\\n",
- "inference.insert_media_token=null \\\n",
- "inference.tokens_to_generate=256 \\\n",
- "quantization.algorithm=awq \\\n",
- "quantization.enable=False"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7d989385",
- "metadata": {},
- "source": [
- "#### Running Inference via Launcher\n",
- "\n",
- "Inference can also be run via the NeMo Launcher, where parameters are specified in the inference config file rather than CLI arguments. To customize the default config provided in `conf/config.yaml` for NeVA inference, see below.\n",
- "\n",
- "##### Inference Config Setup\n",
- "1. Modify `fw_inference` within `defaults` to use `neva/inference` \n",
- "2. In `stages`, ensure that `fw_inference` is included\n",
- "3. Within the `inference.yaml` default NeVA inference config file, ensure that the path to the `prompt` file, `neva_model_file`, and `media_base_path` within `inference` are specified.\n",
- "\n",
- "Once either the necessary checkpoints have been loaded or the training workflow is complete, inference can be executed within the launcher pipeline with the following command:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "68d434ff",
- "metadata": {
- "vscode": {
- "languageId": "plaintext"
- }
- },
- "outputs": [],
- "source": [
- "! python3 main.py"
+ "inference.tokens_to_generate=256"
]
}
],
@@ -360,7 +345,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.6"
+ "version": "3.11.6"
}
},
"nbformat": 4,