diff --git a/scripts/convert.py b/scripts/convert.py index bd9ae0797..47a9792de 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -14,6 +14,7 @@ ) import onnx +import onnxslim from optimum.exporters.onnx import main_export, export_models from optimum.onnx.graph_transformations import check_and_save_model from optimum.exporters.tasks import TasksManager @@ -224,6 +225,12 @@ class ConversionArguments: "that desire a finer-grained control on the export." } ) + skip_onnxslim: bool = field( + default=False, + metadata={ + "help": "Whether or not to skip onnxslim." + } + ) def get_operators(model: onnx.ModelProto) -> Set[str]: @@ -623,6 +630,13 @@ def main(): os.makedirs(os.path.join(output_model_folder, 'onnx'), exist_ok=True) + if not conv_args.skip_onnxslim: + onnx_models = [os.path.join(output_model_folder, x) + for x in os.listdir(output_model_folder) if x.endswith('.onnx')] + + for model in onnx_models: + onnxslim.slim(model, model) + # Step 2. (optional, recommended) quantize the converted model for fast inference and to reduce model size. if conv_args.quantize: # Update quantize config with model specific defaults diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 3adf88030..b0278caad 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,3 +4,4 @@ optimum==1.17.1 onnx==1.15.0 onnxconverter-common==1.14.0 tqdm +onnxslim==0.1.29.1 \ No newline at end of file