Module brevettiai.utils.onnx_benchmark

Benchmarks for onnx models

Example

python -m brevettiai.utils.onnx_benchmark [path_to_onnx] …

Expand source code
"""
Benchmarks for onnx models
Example:
    python -m brevettiai.utils.onnx_benchmark [path_to_onnx] ...
"""
import argparse
import os
import time
from pydantic import BaseModel, Field
from typing import Optional

import cv2
import numpy as np
import pandas as pd
import onnxruntime


class Benchmark(BaseModel):
    """
    Collection of information about a benchmark
    """
    name: str
    timing: list
    warmup: int = 0
    producer: str = ""
    input_example: Optional[np.ndarray] = None
    output_example: Optional[np.ndarray] = None

    class Config:
        arbitrary_types_allowed = True

    @property
    def valid_timing(self):
        return self.timing[self.warmup:]

    @property
    def t0(self):
        return self.timing[0]

    @property
    def mean(self):
        return np.mean(self.valid_timing)

    @property
    def worst_case(self):
        return max(self.valid_timing)

    @staticmethod
    def _shape_and_dtype_str(array):
        if array is None:
            return "?"
        return f"{array.shape}[{array.dtype}]"

    def __str__(self):
        return f"{self.name}[{self.producer}] " \
               f"{self._shape_and_dtype_str(self.input_example)} -> " \
               f"{self._shape_and_dtype_str(self.output_example)}\n" \
               f"\taverage: {self.mean * 1000:.1f}ms, " \
               f"worst case: {self.worst_case * 1000:.1f}ms, " \
               f"first batch: {self.t0 * 1000:.1f}ms"


class BenchmarkCV2(Benchmark):
    layer_timing: np.ndarray
    layer_names: list = Field(default_factory=list)

    @property
    def layer_mean_performance(self):
        t = self.layer_timing[self.warmup:]
        t = pd.Series(np.mean(t, 0), self.layer_names)
        t.index.rename("layers")
        return t

    def __str__(self):
        layer_perf = self.layer_mean_performance.sort_values(ascending=False).head(10)
        return super().__str__() + f"\nLayer performance:\n{(layer_perf*1000).apply('{:.1f}ms'.format).to_string()}"


def _get_input_data_generator(path):
    model = onnxruntime.InferenceSession(path)
    input_ = model.get_inputs()[0]
    dtype_ = np.dtype(input_.type[7:-1])
    dtype_ = np.float32 if dtype_ == "float64" else dtype_
    while True:
        yield np.random.randn(*input_.shape).astype(dtype_)


def benchmark_onnx_with_onnxruntime(name: str, path: str, runs: int = 100, warmup_runs: int = 10) -> Benchmark:
    """
    Benchmark an onnx with onnxruntime

    Args:
        path:
        runs:
        warmup_runs:
        name:

    Returns:

    """
    assert runs > 0
    assert warmup_runs > 0
    sess_option = onnxruntime.SessionOptions()
    sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

    providers = [
        ('CUDAExecutionProvider', {
            'device_id': 1,
            'arena_extend_strategy': 'kNextPowerOfTwo',
            'gpu_mem_limit': 6 * 1024 * 1024 * 1024,
            'cudnn_conv_algo_search': 'EXHAUSTIVE',
            'do_copy_in_default_stream': True,
        }),
        'CPUExecutionProvider',
    ]
    model = onnxruntime.InferenceSession(onnx, providers=providers, sess_options=sess_option)

    input_ = model.get_inputs()[0]
    output_ = model.get_outputs()[0]
    input_data_generator = _get_input_data_generator(path)

    input_data, output_data = None, None
    timing = []
    for i in range(runs + warmup_runs):
        input_data = {input_.name: next(input_data_generator)}
        ta = time.perf_counter()

        output_data = model.run([output_.name], input_data)[0]

        tb = time.perf_counter()
        timing.append((tb - ta))

    return Benchmark(name=name, timing=timing, warmup=warmup_runs, producer=f"onnxruntime-{onnxruntime.__version__}",
                        input_example=input_data[input_.name], output_example=output_data)


def benchmark_onnx_with_cv2(name: str, path: str, runs: int = 100, warmup_runs: int = 10) -> BenchmarkCV2:
    """
    Benchmark with cv2.dnn as runtime

    Args:
        name:
        path:
        runs:
        warmup_runs:

    Returns:

    """
    model = cv2.dnn.readNetFromONNX(path)

    input_data_generator = _get_input_data_generator(path)

    input_data, output_data = None, None
    timing, timing_perf_counter = [], []
    layer_timing = []
    for i in range(runs + warmup_runs):
        input_data = next(input_data_generator)
        ta = time.perf_counter()

        model.setInput(input_data)
        output_data = model.forward()

        tb = time.perf_counter()
        timing_perf_counter.append((tb - ta))
        dt, layer_dt = model.getPerfProfile()
        timing.append(dt / cv2.getTickFrequency())
        layer_timing.append(layer_dt / cv2.getTickFrequency())

    layer_timing = np.stack(layer_timing).squeeze()

    return BenchmarkCV2(name=name, timing=timing,
                        layer_timing=layer_timing, layer_names=model.getLayerNames(),
                        warmup=warmup_runs, producer=f"cv2.dnn-{cv2.version.opencv_version}",
                        input_example=input_data, output_example=output_data)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog='Performance tests for onnx models')
    parser.add_argument('onnx_path', nargs='+', help='Paths to onnx files')
    parser.add_argument('-N', "--runs", help='Number of batches to supply to model', type=int, default=100)
    parser.add_argument("--warmup-runs", help='Number of batches to supply to model for warmup', type=int, default=10)
    args = parser.parse_args()

    for onnx in args.onnx_path:
        name = os.path.basename(onnx)
        kwargs = dict(name=name, path=onnx, runs=args.runs, warmup_runs=args.warmup_runs)
        print(benchmark_onnx_with_cv2(**kwargs), "\n")
        print(benchmark_onnx_with_onnxruntime(**kwargs), "\n")

Functions

def benchmark_onnx_with_cv2(name: str, path: str, runs: int = 100, warmup_runs: int = 10) ‑> BenchmarkCV2

Benchmark with cv2.dnn as runtime

Args

name: path: runs: warmup_runs: Returns:

Expand source code
def benchmark_onnx_with_cv2(name: str, path: str, runs: int = 100, warmup_runs: int = 10) -> BenchmarkCV2:
    """
    Benchmark with cv2.dnn as runtime

    Args:
        name:
        path:
        runs:
        warmup_runs:

    Returns:

    """
    model = cv2.dnn.readNetFromONNX(path)

    input_data_generator = _get_input_data_generator(path)

    input_data, output_data = None, None
    timing, timing_perf_counter = [], []
    layer_timing = []
    for i in range(runs + warmup_runs):
        input_data = next(input_data_generator)
        ta = time.perf_counter()

        model.setInput(input_data)
        output_data = model.forward()

        tb = time.perf_counter()
        timing_perf_counter.append((tb - ta))
        dt, layer_dt = model.getPerfProfile()
        timing.append(dt / cv2.getTickFrequency())
        layer_timing.append(layer_dt / cv2.getTickFrequency())

    layer_timing = np.stack(layer_timing).squeeze()

    return BenchmarkCV2(name=name, timing=timing,
                        layer_timing=layer_timing, layer_names=model.getLayerNames(),
                        warmup=warmup_runs, producer=f"cv2.dnn-{cv2.version.opencv_version}",
                        input_example=input_data, output_example=output_data)
def benchmark_onnx_with_onnxruntime(name: str, path: str, runs: int = 100, warmup_runs: int = 10) ‑> Benchmark

Benchmark an onnx with onnxruntime

Args

path: runs: warmup_runs: name: Returns:

Expand source code
def benchmark_onnx_with_onnxruntime(name: str, path: str, runs: int = 100, warmup_runs: int = 10) -> Benchmark:
    """
    Benchmark an onnx with onnxruntime

    Args:
        path:
        runs:
        warmup_runs:
        name:

    Returns:

    """
    assert runs > 0
    assert warmup_runs > 0
    sess_option = onnxruntime.SessionOptions()
    sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

    providers = [
        ('CUDAExecutionProvider', {
            'device_id': 1,
            'arena_extend_strategy': 'kNextPowerOfTwo',
            'gpu_mem_limit': 6 * 1024 * 1024 * 1024,
            'cudnn_conv_algo_search': 'EXHAUSTIVE',
            'do_copy_in_default_stream': True,
        }),
        'CPUExecutionProvider',
    ]
    model = onnxruntime.InferenceSession(onnx, providers=providers, sess_options=sess_option)

    input_ = model.get_inputs()[0]
    output_ = model.get_outputs()[0]
    input_data_generator = _get_input_data_generator(path)

    input_data, output_data = None, None
    timing = []
    for i in range(runs + warmup_runs):
        input_data = {input_.name: next(input_data_generator)}
        ta = time.perf_counter()

        output_data = model.run([output_.name], input_data)[0]

        tb = time.perf_counter()
        timing.append((tb - ta))

    return Benchmark(name=name, timing=timing, warmup=warmup_runs, producer=f"onnxruntime-{onnxruntime.__version__}",
                        input_example=input_data[input_.name], output_example=output_data)

Classes

class Benchmark (**data: Any)

Collection of information about a benchmark

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class Benchmark(BaseModel):
    """
    Collection of information about a benchmark
    """
    name: str
    timing: list
    warmup: int = 0
    producer: str = ""
    input_example: Optional[np.ndarray] = None
    output_example: Optional[np.ndarray] = None

    class Config:
        arbitrary_types_allowed = True

    @property
    def valid_timing(self):
        return self.timing[self.warmup:]

    @property
    def t0(self):
        return self.timing[0]

    @property
    def mean(self):
        return np.mean(self.valid_timing)

    @property
    def worst_case(self):
        return max(self.valid_timing)

    @staticmethod
    def _shape_and_dtype_str(array):
        if array is None:
            return "?"
        return f"{array.shape}[{array.dtype}]"

    def __str__(self):
        return f"{self.name}[{self.producer}] " \
               f"{self._shape_and_dtype_str(self.input_example)} -> " \
               f"{self._shape_and_dtype_str(self.output_example)}\n" \
               f"\taverage: {self.mean * 1000:.1f}ms, " \
               f"worst case: {self.worst_case * 1000:.1f}ms, " \
               f"first batch: {self.t0 * 1000:.1f}ms"

Ancestors

  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Subclasses

Class variables

var Config
var input_example : Optional[numpy.ndarray]
var name : str
var output_example : Optional[numpy.ndarray]
var producer : str
var timing : list
var warmup : int

Instance variables

var mean
Expand source code
@property
def mean(self):
    return np.mean(self.valid_timing)
var t0
Expand source code
@property
def t0(self):
    return self.timing[0]
var valid_timing
Expand source code
@property
def valid_timing(self):
    return self.timing[self.warmup:]
var worst_case
Expand source code
@property
def worst_case(self):
    return max(self.valid_timing)
class BenchmarkCV2 (**data: Any)

Collection of information about a benchmark

Create a new model by parsing and validating input data from keyword arguments.

Raises ValidationError if the input data cannot be parsed to form a valid model.

Expand source code
class BenchmarkCV2(Benchmark):
    layer_timing: np.ndarray
    layer_names: list = Field(default_factory=list)

    @property
    def layer_mean_performance(self):
        t = self.layer_timing[self.warmup:]
        t = pd.Series(np.mean(t, 0), self.layer_names)
        t.index.rename("layers")
        return t

    def __str__(self):
        layer_perf = self.layer_mean_performance.sort_values(ascending=False).head(10)
        return super().__str__() + f"\nLayer performance:\n{(layer_perf*1000).apply('{:.1f}ms'.format).to_string()}"

Ancestors

  • Benchmark
  • pydantic.main.BaseModel
  • pydantic.utils.Representation

Class variables

var layer_names : list
var layer_timing : numpy.ndarray

Instance variables

var layer_mean_performance
Expand source code
@property
def layer_mean_performance(self):
    t = self.layer_timing[self.warmup:]
    t = pd.Series(np.mean(t, 0), self.layer_names)
    t.index.rename("layers")
    return t