revert to original eval code files 4

NVIDIA · Jan 31, 2024 · 2f21d8d · 2f21d8d
1 parent 1c67ad1
commit 2f21d8d
Showing 1 changed file with 22 additions and 33 deletions.
diff --git a/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml b/examples/nlp/language_modeling/conf/megatron_retro_inference.yaml
@@ -9,47 +9,36 @@ inference:
   repetition_penalty: 1.2  # The parameter for repetition penalty. 1.0 means no penalty.
   min_tokens_to_generate: 0  # The minimum length of the sequence to be generated.
   compute_logprob: False  # a flag used to compute logprob of all the input text, a very special case of running inference, default False
-  end_strings: ["<|endoftext|>"]  # generation will stop when one of these tokens is generated
-  # RETRO-specific arguments
-  retro_inference:
-    retro_gpt_retrieved_length: 128
-    retro_num_neighbors: 2
-    ft_neighbours: 0
-    reuse_top: True
+
 
 trainer:
   devices: 1
   num_nodes: 1
   accelerator: gpu
   logger: False # logger provided by exp_manager
   precision: 16 # 16, 32, or bf16
-  use_distributed_sampler: False
-
 
+inference_batch_size: 2
 tensor_model_parallel_size: -1
 pipeline_model_parallel_size: -1
 pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
-megatron_amp_O2: False  # Enable O2-level automatic mixed precision to save memory
-
-
-retro_model_file: null  # Retro nemo file path
-checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the Retro training
-checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
-hparams_file: null # model configuration file, only used for PTL checkpoint loading
-
-
-prompts: # prompts for Retro inference
-  - "prompt1"
-  - "prompt2"
-
-# RETRO inference
-neighbors: null
-
-server: False  # whether launch the API server
-port: 5555 # the port number for the inference server
-web_server: False # whether launch the web inference server
-share: False  # whether create a public URL
-username: test # user name for web client
-password: test2  # password for web client
-web_port: 9889 # the port number of the web server
-
+retro_model_file: null  # RETRO nemo file path
+
+use_predict_method: False  # whether to use the predict method
+
+prompts: # prompts for RETRO model inference
+  - "hello,"
+  - "good morning,"
+  - "good afternoon,"
+  - "good evening,"
+
+########### Faiss service parameters ########
+retrieval_service:
+  strategy: RetroModelTextGenerationStrategy  # choose customized inference strategy 
+  neighbors: 4
+  frequent_query: False  # for the current token generation, frequently update the retrieval context. If false, update it every 64 tokens 
+  pad_tokens: True # pad the tokens at the beginning to make it minimum of 64 tokens for retrieving at least once
+  store_retrieved: False # whether store the retrieved documents, so it can be checked
+  combo_service:
+    service_ip: '0.0.0.0'
+    service_port: 17181