From 2f21d8db6a8f981290182b7993f83de4b0e901fa Mon Sep 17 00:00:00 2001 From: Huy Vu2 Date: Wed, 31 Jan 2024 14:46:21 -0800 Subject: [PATCH] revert to original eval code files 4 --- .../conf/megatron_retro_inference.yaml | 55 ++++++++----------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml b/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml index 7fe07ea2b6b9..1b99a65f46ad 100644 --- a/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml +++ b/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml @@ -9,13 +9,7 @@ inference: repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty. min_tokens_to_generate: 0 # The minimum length of the sequence to be generated. compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False - end_strings: ["<|endoftext|>"] # generation will stop when one of these tokens is generated - # RETRO-specific arguments - retro_inference: - retro_gpt_retrieved_length: 128 - retro_num_neighbors: 2 - ft_neighbours: 0 - reuse_top: True + trainer: devices: 1 @@ -23,33 +17,28 @@ trainer: accelerator: gpu logger: False # logger provided by exp_manager precision: 16 # 16, 32, or bf16 - use_distributed_sampler: False - +inference_batch_size: 2 tensor_model_parallel_size: -1 pipeline_model_parallel_size: -1 pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others) -megatron_amp_O2: False # Enable O2-level automatic mixed precision to save memory - - -retro_model_file: null # Retro nemo file path -checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the Retro training -checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading -hparams_file: null # model configuration file, only used for PTL checkpoint loading - - -prompts: # prompts for Retro inference - - "prompt1" - - "prompt2" - -# RETRO inference -neighbors: null - -server: False # whether launch the API server -port: 5555 # the port number for the inference server -web_server: False # whether launch the web inference server -share: False # whether create a public URL -username: test # user name for web client -password: test2 # password for web client -web_port: 9889 # the port number of the web server - +retro_model_file: null # RETRO nemo file path + +use_predict_method: False # whether to use the predict method + +prompts: # prompts for RETRO model inference + - "hello," + - "good morning," + - "good afternoon," + - "good evening," + +########### Faiss service parameters ######## +retrieval_service: + strategy: RetroModelTextGenerationStrategy # choose customized inference strategy + neighbors: 4 + frequent_query: False # for the current token generation, frequently update the retrieval context. If false, update it every 64 tokens + pad_tokens: True # pad the tokens at the beginning to make it minimum of 64 tokens for retrieving at least once + store_retrieved: False # whether store the retrieved documents, so it can be checked + combo_service: + service_ip: '0.0.0.0' + service_port: 17181 \ No newline at end of file