onnxruntime-inference-examples/quantization/image_classification/cpu/run.py at main · microsoft/onnxruntime-inference-examples · GitHub

Name: onnxruntime-inference-examples/quantization/image_classification/cpu/run.py at main · microsoft/onnxruntime-inference-examples · GitHub
Rating: 4.7 (9421 reviews)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
importargparse
importnumpyasnp
importonnxruntime
importtime
fromonnxruntime.quantizationimportQuantFormat, QuantType, quantize_static

importresnet50_data_reader


defbenchmark(model_path):
session=onnxruntime.InferenceSession(model_path)
input_name=session.get_inputs()[0].name

total=0.0
runs=10
input_data=np.zeros((1, 3, 224, 224), np.float32)
# Warming up
_=session.run([], {input_name: input_data})
foriinrange(runs):
start=time.perf_counter()
_=session.run([], {input_name: input_data})
end= (time.perf_counter() -start) *1000
total+=end
print(f"{end:.2f}ms")
total/=runs
print(f"Avg: {total:.2f}ms")


defget_args():
parser=argparse.ArgumentParser()
parser.add_argument("--input_model", required=True, help="input model")
parser.add_argument("--output_model", required=True, help="output model")
parser.add_argument(
"--calibrate_dataset", default="./test_images", help="calibration data set"
 )
parser.add_argument(
"--quant_format",
default=QuantFormat.QDQ,
type=QuantFormat.from_string,
choices=list(QuantFormat),
 )
parser.add_argument("--per_channel", default=False, type=bool)
args=parser.parse_args()
returnargs


defmain():
args=get_args()
input_model_path=args.input_model
output_model_path=args.output_model
calibration_dataset_path=args.calibrate_dataset
dr=resnet50_data_reader.ResNet50DataReader(
calibration_dataset_path, input_model_path
 )

# Calibrate and quantize model
# Turn off model optimization during quantization
quantize_static(
input_model_path,
output_model_path,
dr,
quant_format=args.quant_format,
per_channel=args.per_channel,
weight_type=QuantType.QInt8,
 )
print("Calibrated and quantized model saved.")

print("benchmarking fp32 model...")
benchmark(input_model_path)

print("benchmarking int8 model...")
benchmark(output_model_path)


if__name__=="__main__":
main()