From 73a1143ba26d5bcc76da70c4c452e33dc54e38c4 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 13:53:08 +0100 Subject: [PATCH 01/14] added some functions which will load a pipeline with dropout turned on by default, also a function to change the dropout setting at runtime --- .gitignore | 3 + .../dropout_pipelines/dropout-pipeline.py | 28 +++ test.json | 171 ++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 src/arc_spice/dropout_pipelines/dropout-pipeline.py create mode 100644 test.json diff --git a/.gitignore b/.gitignore index 25cf9a4..b5dc7cf 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,6 @@ Thumbs.db # Common editor files *~ *.swp + +# other +temp \ No newline at end of file diff --git a/src/arc_spice/dropout_pipelines/dropout-pipeline.py b/src/arc_spice/dropout_pipelines/dropout-pipeline.py new file mode 100644 index 0000000..44715df --- /dev/null +++ b/src/arc_spice/dropout_pipelines/dropout-pipeline.py @@ -0,0 +1,28 @@ +import torch +from transformers import pipeline + + +def set_dropout(model, dropout_flag: bool): + for _, param in model.named_modules(): + if isinstance(param, torch.nn.Dropout): + # dropout on (True) -> want training mode train(True) + # dropout off (False) -> eval mode train(False) + param.train(dropout_flag) + return model + + +def MCDropoutPipeline(task: str, model: str): + pl = pipeline( + task=task, + model=model, + ) + initial_model = pl.model + pl.model = set_dropout(model=initial_model, dropout_flag=True) + return pl + + +def test_dropout(pipe): + model = pipe.model + for name, param in model.named_modules(): + if isinstance(param, torch.nn.Dropout): + print(name, param.training) diff --git a/test.json b/test.json new file mode 100644 index 0000000..c14ed00 --- /dev/null +++ b/test.json @@ -0,0 +1,171 @@ +{ + "vocab_size": 256102, + "t2u_vocab_size": 10082, + "char_vocab_size": 10943, + "hidden_size": 1024, + "initializer_range": 0.02, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 4096, + "use_cache": true, + "max_new_tokens": 256, + "encoder_layerdrop": 0.05, + "decoder_layerdrop": 0.05, + "activation_function": "relu", + "dropout": 0.15000000000000002, + "attention_dropout": 0.15000000000000002, + "activation_dropout": 0.05, + "scale_embedding": true, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "encoder_layers": 24, + "encoder_ffn_dim": 8192, + "encoder_attention_heads": 16, + "decoder_layers": 24, + "decoder_ffn_dim": 8192, + "decoder_attention_heads": 16, + "speech_encoder_layers": 24, + "speech_encoder_hidden_act": "swish", + "speech_encoder_dropout": 0.05, + "speech_encoder_attention_heads": 16, + "speech_encoder_layerdrop": 0.1, + "speech_encoder_intermediate_size": 4096, + "feature_projection_input_dim": 160, + "adaptor_kernel_size": 8, + "adaptor_stride": 8, + "adaptor_dropout": 0.15000000000000002, + "num_adapter_layers": 1, + "position_embeddings_type": "relative_key", + "conv_depthwise_kernel_size": 31, + "add_adapter": true, + "left_max_position_embeddings": 64, + "right_max_position_embeddings": 8, + "speech_encoder_chunk_size": 20000, + "speech_encoder_left_chunk_num": 128, + "t2u_bos_token_id": 0, + "t2u_pad_token_id": 1, + "t2u_eos_token_id": 2, + "t2u_encoder_layers": 6, + "t2u_encoder_ffn_dim": 8192, + "t2u_encoder_attention_heads": 16, + "t2u_decoder_layers": 6, + "t2u_decoder_ffn_dim": 8192, + "t2u_decoder_attention_heads": 16, + "t2u_max_position_embeddings": 4096, + "t2u_variance_predictor_embed_dim": 1024, + "t2u_variance_predictor_hidden_dim": 256, + "t2u_variance_predictor_kernel_size": 3, + "t2u_variance_pred_dropout": 0.55, + "sampling_rate": 16000, + "upsample_initial_channel": 512, + "upsample_rates": [ + 5, + 4, + 4, + 2, + 2 + ], + "upsample_kernel_sizes": [ + 11, + 8, + 8, + 4, + 4 + ], + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "leaky_relu_slope": 0.1, + "unit_hifi_gan_vocab_size": 10000, + "unit_embed_dim": 1280, + "lang_embed_dim": 256, + "spkr_embed_dim": 256, + "vocoder_num_langs": 36, + "vocoder_num_spkrs": 200, + "variance_predictor_kernel_size": 3, + "var_pred_dropout": 0.55, + "vocoder_offset": 4, + "return_dict": true, + "output_hidden_states": false, + "output_attentions": false, + "torchscript": false, + "torch_dtype": "float32", + "use_bfloat16": false, + "tf_legacy_loss": false, + "pruned_heads": {}, + "tie_word_embeddings": true, + "chunk_size_feed_forward": 0, + "is_encoder_decoder": true, + "is_decoder": false, + "cross_attention_hidden_size": null, + "add_cross_attention": false, + "tie_encoder_decoder": false, + "max_length": 20, + "min_length": 0, + "do_sample": false, + "early_stopping": false, + "num_beams": 1, + "num_beam_groups": 1, + "diversity_penalty": 0.0, + "temperature": 1.0, + "top_k": 50, + "top_p": 1.0, + "typical_p": 1.0, + "repetition_penalty": 1.0, + "length_penalty": 1.0, + "no_repeat_ngram_size": 0, + "encoder_no_repeat_ngram_size": 0, + "bad_words_ids": null, + "num_return_sequences": 1, + "output_scores": false, + "return_dict_in_generate": false, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "remove_invalid_values": false, + "exponential_decay_length_penalty": null, + "suppress_tokens": null, + "begin_suppress_tokens": null, + "architectures": [ + "SeamlessM4Tv2Model" + ], + "finetuning_task": null, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "tokenizer_class": null, + "prefix": null, + "bos_token_id": 2, + "pad_token_id": 0, + "eos_token_id": 3, + "sep_token_id": null, + "decoder_start_token_id": 3, + "task_specific_params": null, + "problem_type": null, + "_name_or_path": "facebook/seamless-m4t-v2-large", + "transformers_version": "4.44.2", + "model_type": "seamless_m4t_v2", + "attn_implementation": null +} \ No newline at end of file From e1f8cd3f40e8afa5ff2b0fb2251902f82969fcd7 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 13:53:32 +0100 Subject: [PATCH 02/14] removed old test json script --- test.json | 171 ------------------------------------------------------ 1 file changed, 171 deletions(-) delete mode 100644 test.json diff --git a/test.json b/test.json deleted file mode 100644 index c14ed00..0000000 --- a/test.json +++ /dev/null @@ -1,171 +0,0 @@ -{ - "vocab_size": 256102, - "t2u_vocab_size": 10082, - "char_vocab_size": 10943, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_eps": 1e-05, - "max_position_embeddings": 4096, - "use_cache": true, - "max_new_tokens": 256, - "encoder_layerdrop": 0.05, - "decoder_layerdrop": 0.05, - "activation_function": "relu", - "dropout": 0.15000000000000002, - "attention_dropout": 0.15000000000000002, - "activation_dropout": 0.05, - "scale_embedding": true, - "num_attention_heads": 16, - "num_hidden_layers": 24, - "encoder_layers": 24, - "encoder_ffn_dim": 8192, - "encoder_attention_heads": 16, - "decoder_layers": 24, - "decoder_ffn_dim": 8192, - "decoder_attention_heads": 16, - "speech_encoder_layers": 24, - "speech_encoder_hidden_act": "swish", - "speech_encoder_dropout": 0.05, - "speech_encoder_attention_heads": 16, - "speech_encoder_layerdrop": 0.1, - "speech_encoder_intermediate_size": 4096, - "feature_projection_input_dim": 160, - "adaptor_kernel_size": 8, - "adaptor_stride": 8, - "adaptor_dropout": 0.15000000000000002, - "num_adapter_layers": 1, - "position_embeddings_type": "relative_key", - "conv_depthwise_kernel_size": 31, - "add_adapter": true, - "left_max_position_embeddings": 64, - "right_max_position_embeddings": 8, - "speech_encoder_chunk_size": 20000, - "speech_encoder_left_chunk_num": 128, - "t2u_bos_token_id": 0, - "t2u_pad_token_id": 1, - "t2u_eos_token_id": 2, - "t2u_encoder_layers": 6, - "t2u_encoder_ffn_dim": 8192, - "t2u_encoder_attention_heads": 16, - "t2u_decoder_layers": 6, - "t2u_decoder_ffn_dim": 8192, - "t2u_decoder_attention_heads": 16, - "t2u_max_position_embeddings": 4096, - "t2u_variance_predictor_embed_dim": 1024, - "t2u_variance_predictor_hidden_dim": 256, - "t2u_variance_predictor_kernel_size": 3, - "t2u_variance_pred_dropout": 0.55, - "sampling_rate": 16000, - "upsample_initial_channel": 512, - "upsample_rates": [ - 5, - 4, - 4, - 2, - 2 - ], - "upsample_kernel_sizes": [ - 11, - 8, - 8, - 4, - 4 - ], - "resblock_kernel_sizes": [ - 3, - 7, - 11 - ], - "resblock_dilation_sizes": [ - [ - 1, - 3, - 5 - ], - [ - 1, - 3, - 5 - ], - [ - 1, - 3, - 5 - ] - ], - "leaky_relu_slope": 0.1, - "unit_hifi_gan_vocab_size": 10000, - "unit_embed_dim": 1280, - "lang_embed_dim": 256, - "spkr_embed_dim": 256, - "vocoder_num_langs": 36, - "vocoder_num_spkrs": 200, - "variance_predictor_kernel_size": 3, - "var_pred_dropout": 0.55, - "vocoder_offset": 4, - "return_dict": true, - "output_hidden_states": false, - "output_attentions": false, - "torchscript": false, - "torch_dtype": "float32", - "use_bfloat16": false, - "tf_legacy_loss": false, - "pruned_heads": {}, - "tie_word_embeddings": true, - "chunk_size_feed_forward": 0, - "is_encoder_decoder": true, - "is_decoder": false, - "cross_attention_hidden_size": null, - "add_cross_attention": false, - "tie_encoder_decoder": false, - "max_length": 20, - "min_length": 0, - "do_sample": false, - "early_stopping": false, - "num_beams": 1, - "num_beam_groups": 1, - "diversity_penalty": 0.0, - "temperature": 1.0, - "top_k": 50, - "top_p": 1.0, - "typical_p": 1.0, - "repetition_penalty": 1.0, - "length_penalty": 1.0, - "no_repeat_ngram_size": 0, - "encoder_no_repeat_ngram_size": 0, - "bad_words_ids": null, - "num_return_sequences": 1, - "output_scores": false, - "return_dict_in_generate": false, - "forced_bos_token_id": null, - "forced_eos_token_id": null, - "remove_invalid_values": false, - "exponential_decay_length_penalty": null, - "suppress_tokens": null, - "begin_suppress_tokens": null, - "architectures": [ - "SeamlessM4Tv2Model" - ], - "finetuning_task": null, - "id2label": { - "0": "LABEL_0", - "1": "LABEL_1" - }, - "label2id": { - "LABEL_0": 0, - "LABEL_1": 1 - }, - "tokenizer_class": null, - "prefix": null, - "bos_token_id": 2, - "pad_token_id": 0, - "eos_token_id": 3, - "sep_token_id": null, - "decoder_start_token_id": 3, - "task_specific_params": null, - "problem_type": null, - "_name_or_path": "facebook/seamless-m4t-v2-large", - "transformers_version": "4.44.2", - "model_type": "seamless_m4t_v2", - "attn_implementation": null -} \ No newline at end of file From 392c7d75ed51ce961cb6a9b3942d89196e81d62b Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 13:56:36 +0100 Subject: [PATCH 03/14] renamed file structure --- .../{dropout_pipelines => dropout_utils}/dropout-pipeline.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/arc_spice/{dropout_pipelines => dropout_utils}/dropout-pipeline.py (100%) diff --git a/src/arc_spice/dropout_pipelines/dropout-pipeline.py b/src/arc_spice/dropout_utils/dropout-pipeline.py similarity index 100% rename from src/arc_spice/dropout_pipelines/dropout-pipeline.py rename to src/arc_spice/dropout_utils/dropout-pipeline.py From 31d9130d68817fbf8619fe1514819c0348753b84 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 14:16:56 +0100 Subject: [PATCH 04/14] adding basic pipeline structure --- src/arc_spice/dropout_utils/__init__.py | 0 ...ropout-pipeline.py => dropout_pipeline.py} | 0 .../dropout_utils/variational-inference.py | 32 +++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 src/arc_spice/dropout_utils/__init__.py rename src/arc_spice/dropout_utils/{dropout-pipeline.py => dropout_pipeline.py} (100%) create mode 100644 src/arc_spice/dropout_utils/variational-inference.py diff --git a/src/arc_spice/dropout_utils/__init__.py b/src/arc_spice/dropout_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/arc_spice/dropout_utils/dropout-pipeline.py b/src/arc_spice/dropout_utils/dropout_pipeline.py similarity index 100% rename from src/arc_spice/dropout_utils/dropout-pipeline.py rename to src/arc_spice/dropout_utils/dropout_pipeline.py diff --git a/src/arc_spice/dropout_utils/variational-inference.py b/src/arc_spice/dropout_utils/variational-inference.py new file mode 100644 index 0000000..8513423 --- /dev/null +++ b/src/arc_spice/dropout_utils/variational-inference.py @@ -0,0 +1,32 @@ +from src.arc_spice.dropout_utils.dropout_pipeline import MCDropoutPipeline + + +class VariationalPipeline: + + def __init__(self, pars: dict[str : dict[str:str]]): + self.transcriber = MCDropoutPipeline( + task=pars["transcriber"]["specific_task"], + model=pars["transcriber"]["model"], + ) + self.translator = MCDropoutPipeline( + task=pars["translator"]["specific_task"], model=pars["translator"]["model"] + ) + self.summariser = MCDropoutPipeline( + task=pars["summariser"]["specific_task"], model=pars["summariser"]["model"] + ) + + +TTS_pars = { + "transcriber": { + "specific_task": "automatic-speech-recognition", + "model": "openai/whisper-small", + }, + "translator": { + "specific_task": "translation_fr_to_en", + "model": "facebook/mbart-large-50-many-to-many-mmt", + }, + "summariser": { + "specific_task": "summarization", + "model": "facebook/bart-large-cnn", + }, +} From 36cb01d00bb0e145ab6a7caa4b65699d7d60253a Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 14:41:36 +0100 Subject: [PATCH 05/14] re-working pipeline --- src/arc_spice/dropout_utils/variational-inference.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/arc_spice/dropout_utils/variational-inference.py b/src/arc_spice/dropout_utils/variational-inference.py index 8513423..743a37c 100644 --- a/src/arc_spice/dropout_utils/variational-inference.py +++ b/src/arc_spice/dropout_utils/variational-inference.py @@ -1,17 +1,19 @@ -from src.arc_spice.dropout_utils.dropout_pipeline import MCDropoutPipeline +from transformers import pipeline + +from src.arc_spice.dropout_utils.dropout_pipeline import MCDropoutPipeline, set_dropout class VariationalPipeline: def __init__(self, pars: dict[str : dict[str:str]]): - self.transcriber = MCDropoutPipeline( + self.transcriber = pipeline( task=pars["transcriber"]["specific_task"], model=pars["transcriber"]["model"], ) - self.translator = MCDropoutPipeline( + self.translator = pipeline( task=pars["translator"]["specific_task"], model=pars["translator"]["model"] ) - self.summariser = MCDropoutPipeline( + self.summariser = pipeline( task=pars["summariser"]["specific_task"], model=pars["summariser"]["model"] ) From 94f93c44f8babec0893af8b63048425ce3476d1b Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 15:26:39 +0100 Subject: [PATCH 06/14] written a preliminary variational inference pipeline --- .../dropout_utils/dropout_pipeline.py | 12 ++++- .../dropout_utils/variational-inference.py | 44 ++++++++++++++++++- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/arc_spice/dropout_utils/dropout_pipeline.py b/src/arc_spice/dropout_utils/dropout_pipeline.py index 44715df..ef98ce9 100644 --- a/src/arc_spice/dropout_utils/dropout_pipeline.py +++ b/src/arc_spice/dropout_utils/dropout_pipeline.py @@ -2,7 +2,17 @@ from transformers import pipeline -def set_dropout(model, dropout_flag: bool): +def set_dropout(model, dropout_flag: bool) -> torch.nn.Module: + """ + Turn on or turn off dropout layers of a model. + + Args: + model: pytorch model + dropout_flag: dropout -> True/False + + Returns: + model: pytorch model with dropout set to desired value throughout + """ for _, param in model.named_modules(): if isinstance(param, torch.nn.Dropout): # dropout on (True) -> want training mode train(True) diff --git a/src/arc_spice/dropout_utils/variational-inference.py b/src/arc_spice/dropout_utils/variational-inference.py index 743a37c..12e3488 100644 --- a/src/arc_spice/dropout_utils/variational-inference.py +++ b/src/arc_spice/dropout_utils/variational-inference.py @@ -1,9 +1,12 @@ from transformers import pipeline -from src.arc_spice.dropout_utils.dropout_pipeline import MCDropoutPipeline, set_dropout +from src.arc_spice.dropout_utils.dropout_pipeline import set_dropout -class VariationalPipeline: +class TTSVariationalPipeline: + """ + variational version of the TTSpipeline + """ def __init__(self, pars: dict[str : dict[str:str]]): self.transcriber = pipeline( @@ -17,6 +20,43 @@ def __init__(self, pars: dict[str : dict[str:str]]): task=pars["summariser"]["specific_task"], model=pars["summariser"]["model"] ) + self.pipeline_map = { + "transcription": self.transcriber, + "translation": self.translator, + "summarisation": self.summariser, + } + + def clean_inference(self, x): + output = {} + """Run the pipeline on an input x""" + transcription = self.transcriber(x) + output["transcription"] = transcription["text"] + translation = self.translator(transcription["text"]) + output["translation"] = translation[0]["translation_text"] + summarisation = self.summariser(translation[0]["translation_text"]) + output["summarisation"] = summarisation[0]["summary_text"] + return output + + def variational_inference(self, x, n_runs=5): + output = {"clean": {}, "variational": {}} + output["clean"] = self.clean_inference(x) + input_map = { + "transcription": x, + "translation": output["clean"]["transcription"], + "summarisation": output["clean"]["translation"], + } + for model_key, pipeline in self.pipeline_map.values(): + # perhaps we could use a context handler here? + pipeline.model = set_dropout(pipeline.model, True) + output["variational"][model_key] = [None] * n_runs + for run_idx in range(n_runs): + output["variational"][model_key][run_idx] = pipeline( + input_map[model_key] + ) + pipeline.model = set_dropout(pipeline.model, False) + + return output + TTS_pars = { "transcriber": { From a6a259f3f8c9131b330521244fddf7f2b6a30dd2 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 27 Sep 2024 15:35:51 +0100 Subject: [PATCH 07/14] renamed the variational inference file --- scripts/variational_TTS_example.py | 46 +++++++++++++++++++ ...-inference.py => variational_inference.py} | 16 ------- 2 files changed, 46 insertions(+), 16 deletions(-) create mode 100644 scripts/variational_TTS_example.py rename src/arc_spice/dropout_utils/{variational-inference.py => variational_inference.py} (84%) diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py new file mode 100644 index 0000000..2672583 --- /dev/null +++ b/scripts/variational_TTS_example.py @@ -0,0 +1,46 @@ +""" + An example use of the transcription, translation and summarisation pipeline. +""" + +import numpy as np +from datasets import Audio, load_dataset + +from arc_spice.pipelines.TTS_pipeline import TTSpipeline + + +def main(TTS_params): + """main function""" + TTS = TTSpipeline(TTS_params) + TTS.print_pipeline() + ds = load_dataset( + "facebook/multilingual_librispeech", "french", split="test", streaming=True + ) + ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) + input_speech = next(iter(ds))["audio"] + # arrays = [] + # n = 5 + # for idx, data in enumerate(iter(ds)): + # arrays.append(data["audio"]["array"]) + # if idx == n: + # break + # arrays = np.concatenate(arrays) + TTS.run_pipeline(input_speech["array"]) + TTS.print_results() + + +if __name__ == "__main__": + TTS_pars = { + "transcriber": { + "specific_task": "automatic-speech-recognition", + "model": "openai/whisper-small", + }, + "translator": { + "specific_task": "translation_fr_to_en", + "model": "facebook/mbart-large-50-many-to-many-mmt", + }, + "summariser": { + "specific_task": "summarization", + "model": "facebook/bart-large-cnn", + }, + } + main(TTS_params=TTS_pars) diff --git a/src/arc_spice/dropout_utils/variational-inference.py b/src/arc_spice/dropout_utils/variational_inference.py similarity index 84% rename from src/arc_spice/dropout_utils/variational-inference.py rename to src/arc_spice/dropout_utils/variational_inference.py index 12e3488..b5af037 100644 --- a/src/arc_spice/dropout_utils/variational-inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -56,19 +56,3 @@ def variational_inference(self, x, n_runs=5): pipeline.model = set_dropout(pipeline.model, False) return output - - -TTS_pars = { - "transcriber": { - "specific_task": "automatic-speech-recognition", - "model": "openai/whisper-small", - }, - "translator": { - "specific_task": "translation_fr_to_en", - "model": "facebook/mbart-large-50-many-to-many-mmt", - }, - "summariser": { - "specific_task": "summarization", - "model": "facebook/bart-large-cnn", - }, -} From 926f36e0fb0582e8b10e20251f3966fa8ef7cbe9 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Tue, 1 Oct 2024 15:02:02 +0100 Subject: [PATCH 08/14] added logits, token-wise entropy, and token-wise probability to pipeline --- .vscode/launch.json | 16 ++ scripts/variational_TTS_example.py | 36 ++-- .../dropout_utils/dropout_pipeline.py | 18 +- .../dropout_utils/variational_inference.py | 203 +++++++++++++++--- 4 files changed, 219 insertions(+), 54 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..969d736 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": false + } + ] +} \ No newline at end of file diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py index 2672583..1b9e4a9 100644 --- a/scripts/variational_TTS_example.py +++ b/scripts/variational_TTS_example.py @@ -2,45 +2,49 @@ An example use of the transcription, translation and summarisation pipeline. """ -import numpy as np +import torch from datasets import Audio, load_dataset -from arc_spice.pipelines.TTS_pipeline import TTSpipeline +from arc_spice.dropout_utils.variational_inference import TTSVariationalPipeline def main(TTS_params): """main function""" - TTS = TTSpipeline(TTS_params) - TTS.print_pipeline() + var_pipe = TTSVariationalPipeline(TTS_params) ds = load_dataset( "facebook/multilingual_librispeech", "french", split="test", streaming=True ) ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) input_speech = next(iter(ds))["audio"] - # arrays = [] - # n = 5 - # for idx, data in enumerate(iter(ds)): - # arrays.append(data["audio"]["array"]) - # if idx == n: - # break - # arrays = np.concatenate(arrays) - TTS.run_pipeline(input_speech["array"]) - TTS.print_results() + + clean_output = var_pipe.clean_inference(input_speech["array"]) + # logit shapes + print(clean_output["transcription"]["logits"].shape) + print(clean_output["translation"]["logits"].shape) + print(clean_output["summarisation"]["logits"].shape) + # entropy + print(torch.mean(clean_output["transcription"]["entropy"])) + print(torch.mean(clean_output["translation"]["entropy"])) + print(torch.mean(clean_output["summarisation"]["entropy"])) + # probability + print(torch.mean(clean_output["transcription"]["probs"])) + print(torch.mean(clean_output["translation"]["probs"])) + print(torch.mean(clean_output["summarisation"]["probs"])) if __name__ == "__main__": TTS_pars = { "transcriber": { "specific_task": "automatic-speech-recognition", - "model": "openai/whisper-small", + "model": "jonatasgrosman/wav2vec2-large-xlsr-53-french", }, "translator": { "specific_task": "translation_fr_to_en", - "model": "facebook/mbart-large-50-many-to-many-mmt", + "model": "ybanas/autotrain-fr-en-translate-51410121895", }, "summariser": { "specific_task": "summarization", - "model": "facebook/bart-large-cnn", + "model": "marianna13/flan-t5-base-summarization", }, } main(TTS_params=TTS_pars) diff --git a/src/arc_spice/dropout_utils/dropout_pipeline.py b/src/arc_spice/dropout_utils/dropout_pipeline.py index ef98ce9..f6947d4 100644 --- a/src/arc_spice/dropout_utils/dropout_pipeline.py +++ b/src/arc_spice/dropout_utils/dropout_pipeline.py @@ -1,24 +1,20 @@ import torch -from transformers import pipeline +from transformers import Pipeline, pipeline -def set_dropout(model, dropout_flag: bool) -> torch.nn.Module: +def set_dropout(model: torch.nn.Module, dropout_flag: bool) -> None: """ Turn on or turn off dropout layers of a model. Args: model: pytorch model dropout_flag: dropout -> True/False - - Returns: - model: pytorch model with dropout set to desired value throughout """ for _, param in model.named_modules(): if isinstance(param, torch.nn.Dropout): # dropout on (True) -> want training mode train(True) # dropout off (False) -> eval mode train(False) param.train(dropout_flag) - return model def MCDropoutPipeline(task: str, model: str): @@ -31,8 +27,12 @@ def MCDropoutPipeline(task: str, model: str): return pl -def test_dropout(pipe): +def test_dropout(pipe: Pipeline, dropout_flag: bool): model = pipe.model - for name, param in model.named_modules(): + dropout_count = 0 + for _, param in model.named_modules(): if isinstance(param, torch.nn.Dropout): - print(name, param.training) + dropout_count += 1 + assert param.training == dropout_flag + + print(f"{dropout_count} dropout layers found in correct configuration.") diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index b5af037..bb4f37b 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -1,23 +1,47 @@ -from transformers import pipeline +import copy +from typing import Dict, Optional -from src.arc_spice.dropout_utils.dropout_pipeline import set_dropout +import numpy as np +import torch +from torch.distributions import Categorical +from torch.nn.functional import softmax +from transformers import ( + AutomaticSpeechRecognitionPipeline, + SummarizationPipeline, + TranslationPipeline, + pipeline, +) + +from arc_spice.dropout_utils.dropout_pipeline import set_dropout, test_dropout + + +def get_confidence_metrics(logits): + entropy = Categorical(logits=logits).entropy() + probs = softmax(logits, dim=-1) + return {"entropy": entropy, "probs": probs} class TTSVariationalPipeline: """ - variational version of the TTSpipeline + variational version of the TTS pipeline """ def __init__(self, pars: dict[str : dict[str:str]]): self.transcriber = pipeline( task=pars["transcriber"]["specific_task"], model=pars["transcriber"]["model"], + pipeline_class=CustomSpeechRecognitionPipeline, ) self.translator = pipeline( - task=pars["translator"]["specific_task"], model=pars["translator"]["model"] + task=pars["translator"]["specific_task"], + model=pars["translator"]["model"], + max_length=1024, + pipeline_class=CustomTranslationPipeline, ) self.summariser = pipeline( - task=pars["summariser"]["specific_task"], model=pars["summariser"]["model"] + task=pars["summariser"]["specific_task"], + model=pars["summariser"]["model"], + pipeline_class=CustomSummarizationPipeline, ) self.pipeline_map = { @@ -25,34 +49,155 @@ def __init__(self, pars: dict[str : dict[str:str]]): "translation": self.translator, "summarisation": self.summariser, } + self.generate_kwargs = {"output_scores": True} + + def get_all_confidence_metrics(self, output): + for step in self.pipeline_map.keys(): + output[step].update(get_confidence_metrics(output[step]["logits"])) + return output def clean_inference(self, x): - output = {} + output = {step: {} for step in self.pipeline_map.keys()} """Run the pipeline on an input x""" - transcription = self.transcriber(x) - output["transcription"] = transcription["text"] - translation = self.translator(transcription["text"]) - output["translation"] = translation[0]["translation_text"] - summarisation = self.summariser(translation[0]["translation_text"]) - output["summarisation"] = summarisation[0]["summary_text"] + # transcription + transcription = self.transcriber(x, generate_kwargs=self.generate_kwargs) + output["transcription"]["outputs"] = transcription["text"] + output["transcription"]["logits"] = ( + transcription["raw_outputs"][0]["logits"].squeeze().T + ) + # translation + translation = self.translator( + transcription["text"], + output_logits=True, + return_dict_in_generate=True, + ) + output["translation"]["outputs"] = translation["translation_text"] + output["translation"]["logits"] = torch.cat( + translation["raw_outputs"]["logits"] + ) + # summarisation + summarisation = self.summariser( + translation["translation_text"], + output_logits=True, + return_dict_in_generate=True, + ) + output["summarisation"]["outputs"] = summarisation["summary_text"] + output["summarisation"]["logits"] = torch.cat( + summarisation["raw_outputs"]["logits"] + ) + + # add confidence metrics using the logits + output = self.get_all_confidence_metrics(output=output) + return output - def variational_inference(self, x, n_runs=5): - output = {"clean": {}, "variational": {}} - output["clean"] = self.clean_inference(x) - input_map = { - "transcription": x, - "translation": output["clean"]["transcription"], - "summarisation": output["clean"]["translation"], + # def variational_inference(self, x, n_runs=5): + # output = {"clean": {}, "variational": {}} + # output["clean"] = self.clean_inference(x) + # input_map = { + # "transcription": x, + # "translation": output["clean"]["transcription"], + # "summarisation": output["clean"]["translation"], + # } + # for model_key, pl in self.pipeline_map.items(): + # # perhaps we could use a context handler here? + # set_dropout(model=pl.model, dropout_flag=True) + # output["variational"][model_key] = [None] * n_runs + # for run_idx in range(n_runs): + # output["variational"][model_key][run_idx] = pl( + # input_map[model_key], output_scores=True + # ) + # set_dropout(model=pl.model, dropout_flag=False) + # return output + + +class CustomSpeechRecognitionPipeline(AutomaticSpeechRecognitionPipeline): + def postprocess( + self, + model_outputs: dict, + **postprocess_params, + ): + # model_outputs gets overwritten in the super().postprocess call + # make a copy here so we retain the information we want + raw_out = copy.deepcopy(model_outputs) + processed = super().postprocess(model_outputs, **postprocess_params) + + new_output = {"text": processed["text"], "raw_outputs": raw_out} + return new_output + + +class CustomTranslationPipeline(TranslationPipeline): + def postprocess( + self, + model_outputs: dict, + **postprocess_params, + ): + # model_outputs gets overwritten in the super().postprocess call + # make a copy here so we retain the information we want + raw_out = copy.deepcopy(model_outputs) + processed = super().postprocess(model_outputs, **postprocess_params) + + new_output = { + "translation_text": processed[0]["translation_text"], + "raw_outputs": raw_out, } - for model_key, pipeline in self.pipeline_map.values(): - # perhaps we could use a context handler here? - pipeline.model = set_dropout(pipeline.model, True) - output["variational"][model_key] = [None] * n_runs - for run_idx in range(n_runs): - output["variational"][model_key][run_idx] = pipeline( - input_map[model_key] - ) - pipeline.model = set_dropout(pipeline.model, False) + return new_output - return output + def _forward(self, model_inputs, **generate_kwargs): + if self.framework == "pt": + in_b, input_length = model_inputs["input_ids"].shape + elif self.framework == "tf": + raise NotImplementedError + + self.check_inputs( + input_length, + generate_kwargs.get("min_length", self.model.config.min_length), + generate_kwargs.get("max_length", self.model.config.max_length), + ) + out = self.model.generate(**model_inputs, **generate_kwargs) + output_ids = out["sequences"] + out_b = output_ids.shape[0] + if self.framework == "pt": + output_ids = output_ids.reshape(in_b, out_b // in_b, *output_ids.shape[1:]) + elif self.framework == "tf": + raise NotImplementedError + return {"output_ids": output_ids, "logits": out["logits"]} + + +class CustomSummarizationPipeline(SummarizationPipeline): + + def postprocess( + self, + model_outputs: dict, + **postprocess_params, + ): + # model_outputs gets overwritten in the super().postprocess call + # make a copy here so we retain the information we want + raw_out = copy.deepcopy(model_outputs) + processed = super().postprocess(model_outputs, **postprocess_params) + + new_output = { + "summary_text": processed[0]["summary_text"], + "raw_outputs": raw_out, + } + return new_output + + def _forward(self, model_inputs, **generate_kwargs): + if self.framework == "pt": + in_b, input_length = model_inputs["input_ids"].shape + elif self.framework == "tf": + raise NotImplementedError + + self.check_inputs( + input_length, + generate_kwargs.get("min_length", self.model.config.min_length), + generate_kwargs.get("max_length", self.model.config.max_length), + ) + out = self.model.generate(**model_inputs, **generate_kwargs) + output_ids = out["sequences"] + out_b = output_ids.shape[0] + if self.framework == "pt": + output_ids = output_ids.reshape(in_b, out_b // in_b, *output_ids.shape[1:]) + elif self.framework == "tf": + raise NotImplementedError + return {"output_ids": output_ids, "logits": out["logits"]} From 58bff7558014e60a03348bdfd96d0ee1264881bf Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Wed, 2 Oct 2024 13:10:36 +0100 Subject: [PATCH 09/14] added uncertainty to the TTS pipeline, example script shows cumulative uncertainty. TODO: calibrate these confidences --- scripts/variational_TTS_example.py | 36 ++++++--- .../dropout_utils/variational_inference.py | 74 ++++++++++++------- 2 files changed, 72 insertions(+), 38 deletions(-) diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py index 1b9e4a9..88c4dd8 100644 --- a/scripts/variational_TTS_example.py +++ b/scripts/variational_TTS_example.py @@ -17,20 +17,34 @@ def main(TTS_params): ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) input_speech = next(iter(ds))["audio"] - clean_output = var_pipe.clean_inference(input_speech["array"]) + clean_output = var_pipe(input_speech["array"]) # logit shapes - print(clean_output["transcription"]["logits"].shape) - print(clean_output["translation"]["logits"].shape) - print(clean_output["summarisation"]["logits"].shape) + print("Logit shapes:") + for step in var_pipe.pipeline_map.keys(): + print(f"{step.capitalize()}: {clean_output[step]["logits"].shape}") + # entropy - print(torch.mean(clean_output["transcription"]["entropy"])) - print(torch.mean(clean_output["translation"]["entropy"])) - print(torch.mean(clean_output["summarisation"]["entropy"])) - # probability - print(torch.mean(clean_output["transcription"]["probs"])) - print(torch.mean(clean_output["translation"]["probs"])) - print(torch.mean(clean_output["summarisation"]["probs"])) + print("Mean entropy:") + for step in var_pipe.pipeline_map.keys(): + print(f"{step.capitalize()}: {torch.mean(clean_output[step]["entropy"])}") + + # normalised entropy + print("Normalised mean entropy:") + cumulative = 1 + for step in var_pipe.pipeline_map.keys(): + step_entropy = torch.mean(clean_output[step]["normalised_entropy"]) + cumulative*= (1-step_entropy) + print(f"{step.capitalize()}: {step_entropy}") + print(f"Cumulative confidence (1 - entropy): {cumulative}") + # probabilities + print("Mean top probabilities:") + cumulative = 1 + for step in var_pipe.pipeline_map.keys(): + step_prob = torch.mean(clean_output[step]["probs"]) + cumulative *= step_prob + print(f"{step.capitalize()}: {step_prob}") + print(f"Cumulative confidence: {cumulative}") if __name__ == "__main__": TTS_pars = { diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index bb4f37b..a4112fe 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -1,5 +1,5 @@ import copy -from typing import Dict, Optional +from typing import Union import numpy as np import torch @@ -15,10 +15,26 @@ from arc_spice.dropout_utils.dropout_pipeline import set_dropout, test_dropout -def get_confidence_metrics(logits): +def get_confidence_metrics(logits: torch.Tensor) -> dict[str : torch.Tensor]: + """ + calculates confidence metrics for a tensor of logits: + - entropy : token-wise entropy + - normalised entropy : token-wise entropy normalised by vocab size + - probs : log-probabilities of the each generated token + + Returns: + dictionary containing the calculated confidence metrics + """ + vocab = torch.tensor(logits.shape[-1]) entropy = Categorical(logits=logits).entropy() - probs = softmax(logits, dim=-1) - return {"entropy": entropy, "probs": probs} + normalised_entropy = entropy / torch.log(vocab) + softmax_logits = softmax(logits, dim=-1) + max_probs = torch.max(softmax_logits, dim=-1).values + return { + "entropy": entropy, + "normalised_entropy": normalised_entropy, + "probs": max_probs, + } class TTSVariationalPipeline: @@ -51,14 +67,37 @@ def __init__(self, pars: dict[str : dict[str:str]]): } self.generate_kwargs = {"output_scores": True} - def get_all_confidence_metrics(self, output): + def collect_confidence_metrics( + self, output: dict[str : dict[str : torch.Tensor]] + ) -> dict[str : dict[str : torch.Tensor]]: + """ + For each step/model in the pipeline calculates the associated uncertainty + metrics using the logits + + Args: + output: dictionary containing the outputs of each step + + Returns: + updated dictionary containing the confidence metrics calculated for each + step in the pipeline + """ for step in self.pipeline_map.keys(): output[step].update(get_confidence_metrics(output[step]["logits"])) return output - def clean_inference(self, x): + def __call__(self, x: Union[np.ndarray, bytes, str]): + """ + + Run the pipeline on an input x + + Args: + x: numpy array audio input + + Returns: + summarised transcript with associated unvertainties at each step + """ + output = {step: {} for step in self.pipeline_map.keys()} - """Run the pipeline on an input x""" # transcription transcription = self.transcriber(x, generate_kwargs=self.generate_kwargs) output["transcription"]["outputs"] = transcription["text"] @@ -87,29 +126,10 @@ def clean_inference(self, x): ) # add confidence metrics using the logits - output = self.get_all_confidence_metrics(output=output) + output = self.collect_confidence_metrics(output=output) return output - # def variational_inference(self, x, n_runs=5): - # output = {"clean": {}, "variational": {}} - # output["clean"] = self.clean_inference(x) - # input_map = { - # "transcription": x, - # "translation": output["clean"]["transcription"], - # "summarisation": output["clean"]["translation"], - # } - # for model_key, pl in self.pipeline_map.items(): - # # perhaps we could use a context handler here? - # set_dropout(model=pl.model, dropout_flag=True) - # output["variational"][model_key] = [None] * n_runs - # for run_idx in range(n_runs): - # output["variational"][model_key][run_idx] = pl( - # input_map[model_key], output_scores=True - # ) - # set_dropout(model=pl.model, dropout_flag=False) - # return output - class CustomSpeechRecognitionPipeline(AutomaticSpeechRecognitionPipeline): def postprocess( From 648b7b502f269857c0939917fd952cb6c673eccd Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 4 Oct 2024 08:50:50 +0100 Subject: [PATCH 10/14] variational pipeline now calculates the confidence metrics for each forward pass --- .vscode/settings.json | 5 + scripts/variational_TTS_example.py | 12 +- .../dropout_utils/variational_inference.py | 116 +++++++++++------- 3 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..6096dbd --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "editor.formatOnSave": true, + "editor.formatOnPaste": true, + "editor.defaultFormatter": "ms-python.black-formatter" +} \ No newline at end of file diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py index 88c4dd8..2646668 100644 --- a/scripts/variational_TTS_example.py +++ b/scripts/variational_TTS_example.py @@ -1,6 +1,7 @@ """ An example use of the transcription, translation and summarisation pipeline. """ +import json import torch from datasets import Audio, load_dataset @@ -17,7 +18,7 @@ def main(TTS_params): ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) input_speech = next(iter(ds))["audio"] - clean_output = var_pipe(input_speech["array"]) + clean_output = var_pipe.clean_inference(input_speech["array"]) # logit shapes print("Logit shapes:") for step in var_pipe.pipeline_map.keys(): @@ -46,6 +47,15 @@ def main(TTS_params): print(f"{step.capitalize()}: {step_prob}") print(f"Cumulative confidence: {cumulative}") + variational_output = var_pipe.variational_inference(x=input_speech['array'],n_runs=2) + + + for step in var_pipe.pipeline_map.keys(): + print(f'{step}:') + step_output = variational_output['variational'][step] + for run in step_output: + print(run['logits']) + if __name__ == "__main__": TTS_pars = { "transcriber": { diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index a4112fe..9e0160b 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -12,7 +12,7 @@ pipeline, ) -from arc_spice.dropout_utils.dropout_pipeline import set_dropout, test_dropout +from arc_spice.dropout_utils.dropout_pipeline import set_dropout def get_confidence_metrics(logits: torch.Tensor) -> dict[str : torch.Tensor]: @@ -66,26 +66,48 @@ def __init__(self, pars: dict[str : dict[str:str]]): "summarisation": self.summariser, } self.generate_kwargs = {"output_scores": True} + self.func_map = { + "transcription": self.transcribe, + "translation": self.translate, + "summarisation": self.summarise, + } - def collect_confidence_metrics( - self, output: dict[str : dict[str : torch.Tensor]] - ) -> dict[str : dict[str : torch.Tensor]]: - """ - For each step/model in the pipeline calculates the associated uncertainty - metrics using the logits - - Args: - output: dictionary containing the outputs of each step - - Returns: - updated dictionary containing the confidence metrics calculated for each - step in the pipeline - """ - for step in self.pipeline_map.keys(): - output[step].update(get_confidence_metrics(output[step]["logits"])) - return output - - def __call__(self, x: Union[np.ndarray, bytes, str]): + def transcribe(self, x: Union[np.ndarray, bytes, str]): + transcription = self.transcriber(x, generate_kwargs=self.generate_kwargs) + output_text = transcription["text"] + output_logits = transcription["raw_outputs"][0]["logits"].squeeze().T + output_dict = {"outputs": output_text, "logits": output_logits} + confidence_metrics = get_confidence_metrics(output_logits) + output_dict.update(confidence_metrics) + return output_dict + + def translate(self, source_text: str): + translation = self.translator( + source_text, + output_logits=True, + return_dict_in_generate=True, + ) + output_text = translation["translation_text"] + output_logits = torch.cat(translation["raw_outputs"]["logits"]) + output_dict = {"outputs": output_text, "logits": output_logits} + confidence_metrics = get_confidence_metrics(output_logits) + output_dict.update(confidence_metrics) + return output_dict + + def summarise(self, source_text: str): + summarisation = self.summariser( + source_text, + output_logits=True, + return_dict_in_generate=True, + ) + output_text = summarisation["summary_text"] + output_logits = torch.cat(summarisation["raw_outputs"]["logits"]) + output_dict = {"outputs": output_text, "logits": output_logits} + confidence_metrics = get_confidence_metrics(output_logits) + output_dict.update(confidence_metrics) + return output_dict + + def clean_inference(self, x: Union[np.ndarray, bytes, str]): """ Run the pipeline on an input x @@ -99,37 +121,41 @@ def __call__(self, x: Union[np.ndarray, bytes, str]): output = {step: {} for step in self.pipeline_map.keys()} # transcription - transcription = self.transcriber(x, generate_kwargs=self.generate_kwargs) - output["transcription"]["outputs"] = transcription["text"] - output["transcription"]["logits"] = ( - transcription["raw_outputs"][0]["logits"].squeeze().T - ) + transcription = self.transcribe(x) + output["transcription"].update(transcription) + # translation - translation = self.translator( - transcription["text"], - output_logits=True, - return_dict_in_generate=True, - ) - output["translation"]["outputs"] = translation["translation_text"] - output["translation"]["logits"] = torch.cat( - translation["raw_outputs"]["logits"] - ) + translation = self.translate(transcription["outputs"]) + output["translation"].update(translation) + # summarisation - summarisation = self.summariser( - translation["translation_text"], - output_logits=True, - return_dict_in_generate=True, - ) - output["summarisation"]["outputs"] = summarisation["summary_text"] - output["summarisation"]["logits"] = torch.cat( - summarisation["raw_outputs"]["logits"] - ) + summarisation = self.summarise(translation["outputs"]) + output["summarisation"].update(summarisation) - # add confidence metrics using the logits - output = self.collect_confidence_metrics(output=output) + return output + def variational_inference(self, x, n_runs=5): + output = {"clean": {}, "variational": {}} + output["clean"] = self.clean_inference(x) + input_map = { + "transcription": x, + "translation": output["clean"]["transcription"]["outputs"], + "summarisation": output["clean"]["translation"]["outputs"], + } + for model_key, pl in self.pipeline_map.items(): + # perhaps we could use a context handler here? + set_dropout(model=pl.model, dropout_flag=True) + output["variational"][model_key] = [None] * n_runs + for run_idx in range(n_runs): + output["variational"][model_key][run_idx] = self.func_map[model_key]( + input_map[model_key] + ) + set_dropout(model=pl.model, dropout_flag=False) return output + def __call__(self, x): + return self.clean_inference(x) + class CustomSpeechRecognitionPipeline(AutomaticSpeechRecognitionPipeline): def postprocess( From 1396eb15b042fca39342f4fdc3db6eb9a1a19ed3 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 4 Oct 2024 08:53:09 +0100 Subject: [PATCH 11/14] removing .vscode from the repo --- .gitignore | 3 ++- .vscode/launch.json | 16 ---------------- .vscode/settings.json | 5 ----- 3 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index b5dc7cf..666d382 100644 --- a/.gitignore +++ b/.gitignore @@ -158,4 +158,5 @@ Thumbs.db *.swp # other -temp \ No newline at end of file +temp +.vscode \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 969d736..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python Debugger: Current File", - "type": "debugpy", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal", - "justMyCode": false - } - ] -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 6096dbd..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "editor.formatOnSave": true, - "editor.formatOnPaste": true, - "editor.defaultFormatter": "ms-python.black-formatter" -} \ No newline at end of file From efac13e12e8797090fc33a77515ee0a309563d3e Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 4 Oct 2024 09:21:31 +0100 Subject: [PATCH 12/14] added some comments --- src/arc_spice/dropout_utils/variational_inference.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index 9e0160b..dbc1f17 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -135,21 +135,27 @@ def clean_inference(self, x: Union[np.ndarray, bytes, str]): return output def variational_inference(self, x, n_runs=5): + # we need clean inputs to pass to each step, we run that first output = {"clean": {}, "variational": {}} output["clean"] = self.clean_inference(x) + # each step accepts a different input from the clean pipeline input_map = { "transcription": x, "translation": output["clean"]["transcription"]["outputs"], "summarisation": output["clean"]["translation"]["outputs"], } + # for each model in pipeline for model_key, pl in self.pipeline_map.items(): - # perhaps we could use a context handler here? + # turn on dropout for this model set_dropout(model=pl.model, dropout_flag=True) + # create the output list output["variational"][model_key] = [None] * n_runs + # do n runs of the inference for run_idx in range(n_runs): output["variational"][model_key][run_idx] = self.func_map[model_key]( input_map[model_key] ) + # turn off dropout for this model set_dropout(model=pl.model, dropout_flag=False) return output From 8a41bb6fedd9bdf17a4c2e3bc1dadd25d7cc8168 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Fri, 4 Oct 2024 11:36:25 +0100 Subject: [PATCH 13/14] pipeline now calculates a sentence embedding for each input --- scripts/variational_TTS_example.py | 12 +-- .../dropout_utils/variational_inference.py | 86 ++++++++++++++----- 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py index 2646668..0b24da1 100644 --- a/scripts/variational_TTS_example.py +++ b/scripts/variational_TTS_example.py @@ -20,17 +20,17 @@ def main(TTS_params): clean_output = var_pipe.clean_inference(input_speech["array"]) # logit shapes - print("Logit shapes:") + print("\nLogit shapes:") for step in var_pipe.pipeline_map.keys(): print(f"{step.capitalize()}: {clean_output[step]["logits"].shape}") # entropy - print("Mean entropy:") + print("\nMean entropy:") for step in var_pipe.pipeline_map.keys(): print(f"{step.capitalize()}: {torch.mean(clean_output[step]["entropy"])}") # normalised entropy - print("Normalised mean entropy:") + print("\nNormalised mean entropy:") cumulative = 1 for step in var_pipe.pipeline_map.keys(): step_entropy = torch.mean(clean_output[step]["normalised_entropy"]) @@ -39,7 +39,7 @@ def main(TTS_params): print(f"Cumulative confidence (1 - entropy): {cumulative}") # probabilities - print("Mean top probabilities:") + print("\nMean top probabilities:") cumulative = 1 for step in var_pipe.pipeline_map.keys(): step_prob = torch.mean(clean_output[step]["probs"]) @@ -51,10 +51,10 @@ def main(TTS_params): for step in var_pipe.pipeline_map.keys(): - print(f'{step}:') + print(f'\n{step}:') step_output = variational_output['variational'][step] for run in step_output: - print(run['logits']) + print(run['semantic_embedding']) if __name__ == "__main__": TTS_pars = { diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index dbc1f17..69a85a8 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -3,10 +3,13 @@ import numpy as np import torch +import torch.nn.functional as F from torch.distributions import Categorical from torch.nn.functional import softmax from transformers import ( AutomaticSpeechRecognitionPipeline, + AutoModel, + AutoTokenizer, SummarizationPipeline, TranslationPipeline, pipeline, @@ -14,27 +17,21 @@ from arc_spice.dropout_utils.dropout_pipeline import set_dropout +# From huggingface page with model: +# - https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2 -def get_confidence_metrics(logits: torch.Tensor) -> dict[str : torch.Tensor]: - """ - calculates confidence metrics for a tensor of logits: - - entropy : token-wise entropy - - normalised entropy : token-wise entropy normalised by vocab size - - probs : log-probabilities of the each generated token - Returns: - dictionary containing the calculated confidence metrics - """ - vocab = torch.tensor(logits.shape[-1]) - entropy = Categorical(logits=logits).entropy() - normalised_entropy = entropy / torch.log(vocab) - softmax_logits = softmax(logits, dim=-1) - max_probs = torch.max(softmax_logits, dim=-1).values - return { - "entropy": entropy, - "normalised_entropy": normalised_entropy, - "probs": max_probs, - } +# Mean Pooling - Take attention mask into account for correct averaging +def mean_pooling(model_output, attention_mask): + token_embeddings = model_output[ + 0 + ] # First element of model_output contains all token embeddings + input_mask_expanded = ( + attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + ) + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( + input_mask_expanded.sum(1), min=1e-9 + ) class TTSVariationalPipeline: @@ -60,6 +57,13 @@ def __init__(self, pars: dict[str : dict[str:str]]): pipeline_class=CustomSummarizationPipeline, ) + self.semantic_tokenizer = AutoTokenizer.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2" + ) + self.semantic_model = AutoModel.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2" + ) + self.pipeline_map = { "transcription": self.transcriber, "translation": self.translator, @@ -72,12 +76,50 @@ def __init__(self, pars: dict[str : dict[str:str]]): "summarisation": self.summarise, } + def get_confidence_metrics( + self, output_dict: dict[str : str | torch.Tensor] + ) -> dict[str : torch.Tensor]: + """ + calculates confidence metrics for a tensor of logits: + - entropy : token-wise entropy + - normalised entropy : token-wise entropy normalised by vocab size + - probs : log-probabilities of the each generated token + + Returns: + dictionary containing the calculated confidence metrics + """ + logits = output_dict["logits"] + text = output_dict["outputs"] + vocab = torch.tensor(logits.shape[-1]) + entropy = Categorical(logits=logits).entropy() + normalised_entropy = entropy / torch.log(vocab) + softmax_logits = softmax(logits, dim=-1) + max_probs = torch.max(softmax_logits, dim=-1).values + tokenized_text = self.semantic_tokenizer( + text, padding=True, truncation=True, return_tensors="pt" + ) + with torch.no_grad(): + model_embeddings = self.semantic_model(**tokenized_text) + # Perform pooling + sentence_embeddings = mean_pooling( + model_embeddings, tokenized_text["attention_mask"] + ) + + # Normalize embeddings + sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1) + return { + "entropy": entropy, + "normalised_entropy": normalised_entropy, + "probs": max_probs, + "semantic_embedding": sentence_embeddings, + } + def transcribe(self, x: Union[np.ndarray, bytes, str]): transcription = self.transcriber(x, generate_kwargs=self.generate_kwargs) output_text = transcription["text"] output_logits = transcription["raw_outputs"][0]["logits"].squeeze().T output_dict = {"outputs": output_text, "logits": output_logits} - confidence_metrics = get_confidence_metrics(output_logits) + confidence_metrics = self.get_confidence_metrics(output_dict) output_dict.update(confidence_metrics) return output_dict @@ -90,7 +132,7 @@ def translate(self, source_text: str): output_text = translation["translation_text"] output_logits = torch.cat(translation["raw_outputs"]["logits"]) output_dict = {"outputs": output_text, "logits": output_logits} - confidence_metrics = get_confidence_metrics(output_logits) + confidence_metrics = self.get_confidence_metrics(output_dict) output_dict.update(confidence_metrics) return output_dict @@ -103,7 +145,7 @@ def summarise(self, source_text: str): output_text = summarisation["summary_text"] output_logits = torch.cat(summarisation["raw_outputs"]["logits"]) output_dict = {"outputs": output_text, "logits": output_logits} - confidence_metrics = get_confidence_metrics(output_logits) + confidence_metrics = self.get_confidence_metrics(output_dict) output_dict.update(confidence_metrics) return output_dict From ac2f04e3711329ed988a7080fc7aad9a6f40abd6 Mon Sep 17 00:00:00 2001 From: J-Dymond Date: Thu, 10 Oct 2024 16:43:23 +0100 Subject: [PATCH 14/14] semantic density now calculated in the class during variational inference --- scripts/variational_TTS_example.py | 25 +++-- .../dropout_utils/variational_inference.py | 105 ++++++++++++++---- 2 files changed, 100 insertions(+), 30 deletions(-) diff --git a/scripts/variational_TTS_example.py b/scripts/variational_TTS_example.py index 0b24da1..9c61f96 100644 --- a/scripts/variational_TTS_example.py +++ b/scripts/variational_TTS_example.py @@ -1,8 +1,6 @@ """ An example use of the transcription, translation and summarisation pipeline. """ -import json - import torch from datasets import Audio, load_dataset @@ -11,14 +9,17 @@ def main(TTS_params): """main function""" - var_pipe = TTSVariationalPipeline(TTS_params) + var_pipe = TTSVariationalPipeline(TTS_params,n_variational_runs=2) + ds = load_dataset( "facebook/multilingual_librispeech", "french", split="test", streaming=True ) ds = ds.cast_column("audio", Audio(sampling_rate=16_000)) input_speech = next(iter(ds))["audio"] - clean_output = var_pipe.clean_inference(input_speech["array"]) + var_pipe.clean_inference(input_speech["array"]) + clean_output = var_pipe.clean_output + # logit shapes print("\nLogit shapes:") for step in var_pipe.pipeline_map.keys(): @@ -47,14 +48,18 @@ def main(TTS_params): print(f"{step.capitalize()}: {step_prob}") print(f"Cumulative confidence: {cumulative}") - variational_output = var_pipe.variational_inference(x=input_speech['array'],n_runs=2) + print("\nConditional probabilities:") + for step in var_pipe.pipeline_map.keys(): + token_probs = clean_output[step]["probs"] + cond_prob = torch.pow(torch.prod(token_probs,-1),1/len(token_probs)) + print(f"{step.capitalize()}: {cond_prob}") + var_pipe.variational_inference(x=input_speech['array']) + variational_output = var_pipe.var_output + print("\nVariational Inference Semantic Density:") + for step in variational_output['variational'].keys(): + print(f"{step}: {variational_output['variational'][step]['semantic_density']}") - for step in var_pipe.pipeline_map.keys(): - print(f'\n{step}:') - step_output = variational_output['variational'][step] - for run in step_output: - print(run['semantic_embedding']) if __name__ == "__main__": TTS_pars = { diff --git a/src/arc_spice/dropout_utils/variational_inference.py b/src/arc_spice/dropout_utils/variational_inference.py index 69a85a8..ad73aa1 100644 --- a/src/arc_spice/dropout_utils/variational_inference.py +++ b/src/arc_spice/dropout_utils/variational_inference.py @@ -5,10 +5,11 @@ import torch import torch.nn.functional as F from torch.distributions import Categorical -from torch.nn.functional import softmax +from torch.nn.functional import cosine_similarity, softmax from transformers import ( AutomaticSpeechRecognitionPipeline, AutoModel, + AutoModelForSequenceClassification, AutoTokenizer, SummarizationPipeline, TranslationPipeline, @@ -39,7 +40,7 @@ class TTSVariationalPipeline: variational version of the TTS pipeline """ - def __init__(self, pars: dict[str : dict[str:str]]): + def __init__(self, pars: dict[str : dict[str:str]], n_variational_runs=5): self.transcriber = pipeline( task=pars["transcriber"]["specific_task"], model=pars["transcriber"]["model"], @@ -64,17 +65,35 @@ def __init__(self, pars: dict[str : dict[str:str]]): "sentence-transformers/all-MiniLM-L6-v2" ) + self.nli_tokenizer = AutoTokenizer.from_pretrained( + "microsoft/deberta-large-mnli" + ) + + self.nli_model = AutoModelForSequenceClassification.from_pretrained( + "microsoft/deberta-large-mnli" + ) + self.pipeline_map = { "transcription": self.transcriber, "translation": self.translator, "summarisation": self.summariser, } self.generate_kwargs = {"output_scores": True} + self.func_map = { "transcription": self.transcribe, "translation": self.translate, "summarisation": self.summarise, } + self.naive_outputs = { + "outputs", + "logits", + "entropy", + "normalised_entropy", + "probs", + "semantic_embedding", + } + self.n_variational_runs = n_variational_runs def get_confidence_metrics( self, output_dict: dict[str : str | torch.Tensor] @@ -149,6 +168,50 @@ def summarise(self, source_text: str): output_dict.update(confidence_metrics) return output_dict + def collect_metrics(self): + new_var_dict = {} + for step in self.var_output["variational"].keys(): + new_var_dict[step] = {} + for metric in self.naive_outputs: + new_values = [None] * self.n_variational_runs + for run in range(self.n_variational_runs): + new_values[run] = self.var_output["variational"][step][run][metric] + new_var_dict[step][metric] = new_values + + self.var_output["variational"] = new_var_dict + + def calculate_semantic_density(self): + for step in self.var_output["variational"].keys(): + clean_out = self.var_output["clean"][step]["outputs"] + var_step = self.var_output["variational"][step] + kernel_funcs = torch.zeros(self.n_variational_runs) + cond_probs = torch.zeros(self.n_variational_runs) + sims = [None] * self.n_variational_runs + for run_index, run_out in enumerate(var_step["outputs"]): + run_prob = var_step["probs"][run_index] + nli_inp = clean_out + " [SEP] " + run_out + encoded_nli = self.nli_tokenizer.encode( + nli_inp, padding=True, return_tensors="pt" + ) + sims[run_index] = cosine_similarity( + self.var_output["clean"][step]["semantic_embedding"], + var_step["semantic_embedding"][run_index], + ) + nli_out = softmax(self.nli_model(encoded_nli)["logits"], dim=-1)[0] + kernel_func = 1 - (nli_out[0] + (0.5 * nli_out[1])) + cond_probs[run_index] = torch.pow( + torch.prod(run_prob, -1), 1 / len(run_prob) + ) + kernel_funcs[run_index] = kernel_func + semantic_density = ( + 1 + / (torch.sum(cond_probs)) + * torch.sum(torch.mul(cond_probs, kernel_funcs)) + ) + self.var_output["variational"][step].update( + {"semantic_density": semantic_density.item(), "cosine_similarity": sims} + ) + def clean_inference(self, x: Union[np.ndarray, bytes, str]): """ @@ -161,48 +224,50 @@ def clean_inference(self, x: Union[np.ndarray, bytes, str]): summarised transcript with associated unvertainties at each step """ - output = {step: {} for step in self.pipeline_map.keys()} + self.clean_output = {step: {} for step in self.pipeline_map.keys()} # transcription transcription = self.transcribe(x) - output["transcription"].update(transcription) + self.clean_output["transcription"].update(transcription) # translation translation = self.translate(transcription["outputs"]) - output["translation"].update(translation) + self.clean_output["translation"].update(translation) # summarisation summarisation = self.summarise(translation["outputs"]) - output["summarisation"].update(summarisation) + self.clean_output["summarisation"].update(summarisation) - return output - - def variational_inference(self, x, n_runs=5): + def variational_inference(self, x): # we need clean inputs to pass to each step, we run that first - output = {"clean": {}, "variational": {}} - output["clean"] = self.clean_inference(x) + self.var_output = {"clean": {}, "variational": {}} + self.clean_inference(x) + self.var_output["clean"] = self.clean_output # each step accepts a different input from the clean pipeline input_map = { "transcription": x, - "translation": output["clean"]["transcription"]["outputs"], - "summarisation": output["clean"]["translation"]["outputs"], + "translation": self.var_output["clean"]["transcription"]["outputs"], + "summarisation": self.var_output["clean"]["translation"]["outputs"], } # for each model in pipeline for model_key, pl in self.pipeline_map.items(): # turn on dropout for this model set_dropout(model=pl.model, dropout_flag=True) # create the output list - output["variational"][model_key] = [None] * n_runs + self.var_output["variational"][model_key] = [None] * self.n_variational_runs # do n runs of the inference - for run_idx in range(n_runs): - output["variational"][model_key][run_idx] = self.func_map[model_key]( - input_map[model_key] - ) + for run_idx in range(self.n_variational_runs): + self.var_output["variational"][model_key][run_idx] = self.func_map[ + model_key + ](input_map[model_key]) # turn off dropout for this model set_dropout(model=pl.model, dropout_flag=False) - return output + + self.collect_metrics() + self.calculate_semantic_density() def __call__(self, x): - return self.clean_inference(x) + self.clean_inference(x) + return self.clean_output class CustomSpeechRecognitionPipeline(AutomaticSpeechRecognitionPipeline):