This repo is for research only.
conda create --prefix ./envs/ python=3.10
conda init
conda activate envs
cd scripts
pip install -r requirements.txt
cd ..
cd alpaca_eval
pip install -e .[all]
pip install openai==0.27.0
export OPENAI_API_KEY=<your_api_key>
export OPENAI_ORGANIZATION_IDS=<your_organization_id> # Optional; if not set, this will be your default org id.
data_path='data/fofo_test_prompts.json'
output_path='results'
CUDA_VISIBLE_DEVICES='0' python scripts/inference_anymodel_anydata.py --input_file_path $data_path --output_file_path $output_path/wizardlm-13b-v1.2/model_outputs.json --model_name_or_path WizardLM/WizardLM-13B-V1.2 --prompt_style wizardlm --max_seq_length 5120```
alpaca_eval --annotators_config gpt4_format_correctness --model_outputs $output_path/chatgpt/reference_outputs.json --output_path $output_path/wizardlm-13b-v1.2/
If you want to draw radar figures of your model's performance and conduct domain/format analysis, please refer to
scripts/draw_analysis.py
Please consider using the follow citation when using our code:
@article{xia2024fofo,
title={FOFO: A Benchmark to Evaluate LLMs' Format-Following Capability},
author={Xia, Congying and Xing, Chen and Du, Jiangshu and Yang, Xinyi and Feng, Yihao and Xu, Ran and Yin, Wenpeng and Xiong, Caiming},
journal={arXiv preprint arXiv:2402.18667},
year={2024}
}