Skip to content

Commit

Permalink
revert to original eval code files 4
Browse files Browse the repository at this point in the history
  • Loading branch information
Huy Vu2 committed Jan 31, 2024
1 parent 1c67ad1 commit 2f21d8d
Showing 1 changed file with 22 additions and 33 deletions.
55 changes: 22 additions & 33 deletions examples/nlp/language_modeling/conf/megatron_retro_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,36 @@ inference:
repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty.
min_tokens_to_generate: 0 # The minimum length of the sequence to be generated.
compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False
end_strings: ["<|endoftext|>"] # generation will stop when one of these tokens is generated
# RETRO-specific arguments
retro_inference:
retro_gpt_retrieved_length: 128
retro_num_neighbors: 2
ft_neighbours: 0
reuse_top: True


trainer:
devices: 1
num_nodes: 1
accelerator: gpu
logger: False # logger provided by exp_manager
precision: 16 # 16, 32, or bf16
use_distributed_sampler: False


inference_batch_size: 2
tensor_model_parallel_size: -1
pipeline_model_parallel_size: -1
pipeline_model_parallel_split_rank: -1 # used for encoder and decoder model (0 for others)
megatron_amp_O2: False # Enable O2-level automatic mixed precision to save memory


retro_model_file: null # Retro nemo file path
checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the Retro training
checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading
hparams_file: null # model configuration file, only used for PTL checkpoint loading


prompts: # prompts for Retro inference
- "prompt1"
- "prompt2"

# RETRO inference
neighbors: null

server: False # whether launch the API server
port: 5555 # the port number for the inference server
web_server: False # whether launch the web inference server
share: False # whether create a public URL
username: test # user name for web client
password: test2 # password for web client
web_port: 9889 # the port number of the web server

retro_model_file: null # RETRO nemo file path

use_predict_method: False # whether to use the predict method

prompts: # prompts for RETRO model inference
- "hello,"
- "good morning,"
- "good afternoon,"
- "good evening,"

########### Faiss service parameters ########
retrieval_service:
strategy: RetroModelTextGenerationStrategy # choose customized inference strategy
neighbors: 4
frequent_query: False # for the current token generation, frequently update the retrieval context. If false, update it every 64 tokens
pad_tokens: True # pad the tokens at the beginning to make it minimum of 64 tokens for retrieving at least once
store_retrieved: False # whether store the retrieved documents, so it can be checked
combo_service:
service_ip: '0.0.0.0'
service_port: 17181

0 comments on commit 2f21d8d

Please sign in to comment.