-
Notifications
You must be signed in to change notification settings - Fork 62
/
onnx_convert.py
155 lines (138 loc) · 5.31 KB
/
onnx_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import torch
import timm
import io
import onnx
import os
import argparse
import onnx_graphsurgeon as gs
from polygraphy.backend.onnx import fold_constants
from fastervit.models.faster_vit import *
from fastervit.models.faster_vit_any_res import *
parser = argparse.ArgumentParser(description='Export FasterVit model XYZ to ONNX file XYZ.onnx')
parser.add_argument('--model-name', type=str, default="faster_vit_0_any_res")
parser.add_argument('--result-dir', type=str, default="./")
parser.add_argument("--pretrained_path",type=str, default="")
parser.add_argument('--onnx-opset', type=int, default=17)
parser.add_argument('--resolution-h', type=int, default=224)
parser.add_argument('--resolution-w', type=int, default=224)
parser.add_argument('--simplify', action='store_true', help="Further simplify the ONNX model with polygraphy")
args = parser.parse_args()
def main():
model_name = args.model_name
resolution_h = args.resolution_h
resolution_w = args.resolution_w
onnx_opset = args.onnx_opset
result_dir = args.result_dir
pretrained_path = args.pretrained_path
if "_224" in model_name:
assert resolution_h == resolution_w
model = timm.create_model(
model_name,
resolution=resolution_h if "_224" in model_name else [resolution_h, resolution_w],
pretrained=pretrained_path,
exportable=True)
in_size = (1, 3, resolution_h, resolution_w)
model = model.cuda()
model.eval()
imgs = torch.randn(in_size,
device="cuda",
requires_grad=True)
export_onnx(model,
imgs,
onnx_file_name=model_name+'.onnx',
export_params=True,
opset_version=onnx_opset,
result_dir=result_dir)
def export_onnx(
model: torch.nn.Module,
sample_inputs,
export_params: bool = False,
opset_version: int = 17,
result_dir: str = "",
batch_first: bool = True,
is_training: bool = False,
onnx_file_name: str ="",
) -> None:
f = io.BytesIO()
torch.onnx.export(
model,
# ONNX has issue to unpack the tuple of parameters to the model.
# https://github.com/pytorch/pytorch/issues/11456
(sample_inputs,) if type(sample_inputs) == tuple else sample_inputs,
f,
export_params=export_params,
training=torch.onnx.TrainingMode.TRAINING
if is_training
else torch.onnx.TrainingMode.EVAL,
do_constant_folding=True,
opset_version=opset_version,
input_names=["input"] if batch_first else None,
output_names=["output"] if batch_first else None,
dynamic_axes={"input": [0], "output": [0]} if batch_first else None,
)
onnx_model = onnx.load_model_from_string(f.getvalue(), onnx.ModelProto)
f.close()
onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
output_path = os.path.join(result_dir, onnx_file_name)
onnx.save(
onnx_model,
output_path,
)
if args.simplify:
opt = Optimizer(onnx_model)
opt.info("Original")
opt.cleanup()
opt.info("Clean up")
opt.fold_constants()
opt.info("Fold constant")
opt.infer_shapes()
opt.info("Shape inference")
onnx_polygraphy_opt = opt.cleanup(return_onnx=True)
polygraphy_output_path = output_path.replace(".onnx", ".polygraphy.onnx")
onnx.save(
onnx_polygraphy_opt,
polygraphy_output_path
)
class Optimizer():
def __init__(
self,
onnx_graph,
verbose=False
):
self.graph = gs.import_onnx(onnx_graph)
self.verbose = verbose
def info(self, prefix):
if self.verbose:
print(f"{prefix} .. {len(self.graph.nodes)} nodes, {len(self.graph.tensors().keys())} tensors, {len(self.graph.inputs)} inputs, {len(self.graph.outputs)} outputs")
def cleanup(self, return_onnx=False):
self.graph.cleanup().toposort()
if return_onnx:
return gs.export_onnx(self.graph)
def select_outputs(self, keep, names=None):
self.graph.outputs = [self.graph.outputs[o] for o in keep]
if names:
for i, name in enumerate(names):
self.graph.outputs[i].name = name
def fold_constants(self, return_onnx=False):
onnx_graph = fold_constants(gs.export_onnx(self.graph), allow_onnxruntime_shape_inference=True)
self.graph = gs.import_onnx(onnx_graph)
if return_onnx:
return onnx_graph
def infer_shapes(self, return_onnx=False):
onnx_graph = gs.export_onnx(self.graph)
if onnx_graph.ByteSize() > 2147483648:
raise TypeError("ERROR: model size exceeds supported 2GB limit")
else:
onnx_graph = onnx.shape_inference.infer_shapes(onnx_graph)
self.graph = gs.import_onnx(onnx_graph)
if return_onnx:
return onnx_graph
if __name__ == "__main__":
main()