Skip to content

Commit

Permalink
Add CUDA stream pool (#461)
Browse files Browse the repository at this point in the history
  • Loading branch information
abramov-oleg authored Oct 3, 2023
1 parent 41b0736 commit cedebfc
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 135 deletions.
8 changes: 8 additions & 0 deletions docs/performance.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
| Savant ver. | A4000 | Jetson NX |
|----------------------------------------------------------------------------------|--------|-----------|
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 174.10 | 36.34 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 171.82 | 35.73 |

### conditional_video_processing

Expand All @@ -26,6 +27,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 311.62 | 61.46 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 263.44 | 59.86 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 263.89 | 63.52 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 257.63 | 62.77 |

### face_reid

Expand All @@ -35,6 +37,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 121.11 | 25.4 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 118.79 | 25.99 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 127.37 | 27.61 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 127.23 | 28.71 |

### nvidia_car_classification

Expand All @@ -47,6 +50,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 156.73 | 42.44 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 151.44 | 42.97 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 149.66 | 41.11 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 150.07 | 38.76 |

### opencv_cuda_bg_remover_mog2

Expand All @@ -59,6 +63,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 671.34 | 92.24 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 608.54 | 91.69 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 607.48 | 90.92 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 606.74 | 95.05 |

### opencv_cuda_bg_remover_mog2 (multi-stream)

Expand All @@ -78,6 +83,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 117.22 | 29.27 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 116.43 | 28.05 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 116.61 | 28.54 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 116.44 | 26.03 |

### traffic_meter (yolov8m)

Expand All @@ -90,6 +96,7 @@
| [#341](https://github.com/insight-platform/Savant/issues/341) (no queues) | 135.19 | 24.67 |
| [#347](https://github.com/insight-platform/Savant/issues/347) (no queues) | 136.49 | 24.40 |
| [#407](https://github.com/insight-platform/Savant/issues/407) (no queues) | 136.16 | 24.66 |
| [#456](https://github.com/insight-platform/Savant/issues/456) (no queues) | 136.80 | 23.29 |

### yolov8_seg

Expand All @@ -102,3 +109,4 @@ Note: `yolov8_seg` always has a buffer length of 10. `BUFFER_QUEUES` env doesn't
| [#341](https://github.com/insight-platform/Savant/issues/341) | 45.21 | 14.02 |
| [#347](https://github.com/insight-platform/Savant/issues/347) | 44.34 | 13.07 |
| [#407](https://github.com/insight-platform/Savant/issues/407) | 67.73 | 21.57 |
| [#456](https://github.com/insight-platform/Savant/issues/456) | 68.48 | 21.71 |
14 changes: 14 additions & 0 deletions gst_plugins/python/pyfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ class GstPluginPyFunc(LoggerMixin, GstBase.BaseTransform):
'VideoPipeline object from savant-rs.',
GObject.ParamFlags.READWRITE,
),
'stream-pool-size': (
int,
'Max stream pool size',
'Max stream pool size',
1,
GLib.MAXINT,
1,
GObject.ParamFlags.READWRITE,
),
'dev-mode': (
bool,
'Dev mode flag',
Expand All @@ -90,6 +99,7 @@ def __init__(self):
self.kwargs: Optional[str] = None
self.video_pipeline: Optional[VideoPipeline] = None
self.dev_mode: bool = False
self.max_stream_pool_size: int = 1
# pyfunc object
self.pyfunc: Optional[PyFunc] = None

Expand All @@ -106,6 +116,8 @@ def do_get_property(self, prop: GObject.GParamSpec) -> Any:
return self.kwargs
if prop.name == 'pipeline':
return self.video_pipeline
if prop.name == 'stream-pool-size':
return self.max_stream_pool_size
if prop.name == 'dev-mode':
return self.dev_mode
raise AttributeError(f'Unknown property {prop.name}.')
Expand All @@ -124,6 +136,8 @@ def do_set_property(self, prop: GObject.GParamSpec, value: Any):
self.kwargs = value
elif prop.name == 'pipeline':
self.video_pipeline = value
elif prop.name == 'stream-pool-size':
self.max_stream_pool_size = value
elif prop.name == 'dev-mode':
self.dev_mode = value
else:
Expand Down
3 changes: 1 addition & 2 deletions samples/telemetry/blur.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ def process_frame(self, buffer: Gst.Buffer, frame_meta: NvDsFrameMeta):
# logger messages will be added to span automatically
self.logger.info('Try to blur frame #%d.', frame_meta.frame_num)

stream = self.get_cuda_stream(frame_meta)
with nvds_to_gpu_mat(buffer, frame_meta.frame_meta) as frame_mat:
stream = self.get_cuda_stream(frame_meta)

# create a new span for an important code section
# to track the time spent on its execution
with frame_meta.telemetry_span.nested_span('blur-filter'):
Expand Down
2 changes: 1 addition & 1 deletion samples/yolov8_seg/module/overlay.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def __init__(self, **kwargs):
self.bg_color = np.array([0, 0, 0, 0], dtype=np.uint8)

def draw(self, buffer: Gst.Buffer, frame_meta: NvDsFrameMeta):
stream = self.get_cuda_stream(frame_meta)
with nvds_to_gpu_mat(buffer, frame_meta.frame_meta) as frame_mat:
stream = self.get_cuda_stream(frame_meta)
for obj_meta in frame_meta.objects:
if obj_meta.is_primary:
continue
Expand Down
139 changes: 70 additions & 69 deletions savant/base/input_preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from savant.base.model import OutputImage
from savant.base.pyfunc import BasePyFuncCallableImpl, PyFuncNoopCallException
from savant.deepstream.cudastream import CudaStreams
from savant.deepstream.meta.object import _NvDsObjectMetaImpl
from savant.deepstream.opencv_utils import nvds_to_gpu_mat
from savant.deepstream.utils import nvds_frame_meta_iterator, nvds_obj_meta_iterator
Expand Down Expand Up @@ -52,9 +51,10 @@ def __call__(


class ObjectsPreprocessing:
def __init__(self):
def __init__(self, batch_size: int):
self._preprocessing_functions = {}
self._frames_map = {}
self._stream_pool = [cv2.cuda.Stream() for _ in range(batch_size)]
self.logger = get_logger(__name__)

def add_preprocessing_function(
Expand Down Expand Up @@ -95,77 +95,78 @@ def preprocessing(
self._frames_map[buffer] = {}

nvds_batch_meta = pyds.gst_buffer_get_nvds_batch_meta(buffer)
with CudaStreams() as cuda_streams:
for nvds_frame_meta in nvds_frame_meta_iterator(nvds_batch_meta):
left = 0
top = 0
row_height = 0
cuda_stream = cuda_streams.get_cuda_stream(nvds_frame_meta)
with nvds_to_gpu_mat(buffer, nvds_frame_meta) as frame_mat:
frame_image = GPUImage(image=frame_mat, cuda_stream=cuda_stream)
copy_frame_image = GPUImage(
image=frame_mat.clone(), cuda_stream=cuda_stream

for nvds_frame_meta in nvds_frame_meta_iterator(nvds_batch_meta):
left = 0
top = 0
row_height = 0
cuda_stream = self._stream_pool[nvds_frame_meta.batch_id]
with nvds_to_gpu_mat(buffer, nvds_frame_meta) as frame_mat:
frame_image = GPUImage(image=frame_mat, cuda_stream=cuda_stream)
copy_frame_image = GPUImage(
image=frame_mat.clone(), cuda_stream=cuda_stream
)
self._frames_map[buffer][nvds_frame_meta.batch_id] = copy_frame_image
for nvds_obj_meta in nvds_obj_meta_iterator(nvds_frame_meta):
if nvds_obj_meta.class_id != class_id:
continue
if nvds_obj_meta.unique_component_id != model_uid:
continue
object_meta = _NvDsObjectMetaImpl.from_nv_ds_object_meta(
object_meta=nvds_obj_meta, frame_meta=nvds_frame_meta
)
self._frames_map[buffer][
nvds_frame_meta.batch_id
] = copy_frame_image
for nvds_obj_meta in nvds_obj_meta_iterator(nvds_frame_meta):
if nvds_obj_meta.class_id != class_id:
continue
if nvds_obj_meta.unique_component_id != model_uid:
continue
object_meta = _NvDsObjectMetaImpl.from_nv_ds_object_meta(
object_meta=nvds_obj_meta, frame_meta=nvds_frame_meta

try:
preprocess_image = preprocessing_func(
object_meta=object_meta,
frame_image=copy_frame_image,
cuda_stream=cuda_stream,
)
except Exception as exc:
if dev_mode:
if not isinstance(exc, PyFuncNoopCallException):
self.logger.exception(
'Error in input image preprocessing.'
)
continue
raise exc

try:
preprocess_image = preprocessing_func(
object_meta=object_meta,
frame_image=copy_frame_image,
cuda_stream=cuda_stream,
)
except Exception as exc:
if dev_mode:
if not isinstance(exc, PyFuncNoopCallException):
self.logger.exception(
'Error in input image preprocessing.'
)
continue
raise exc

if not isinstance(preprocess_image, GPUImage):
raise ValueError(
'Preprocessing function must return Image object.'
)
if output_image is not None:
preprocess_image = preprocess_image.resize(
resolution=(output_image.width, output_image.height),
method=output_image.method,
interpolation=output_image.cv2_interpolation,
)
if left + preprocess_image.width > frame_image.width:
left = 0
if row_height == 0:
row_height = preprocess_image.height
top += row_height
row_height = 0
if top >= frame_image.height:
raise ValueError(
'There is no place on frame ' 'to put object image.'
)
if top + preprocess_image.height > frame_image.height:
raise ValueError(
'There is no place on frame ' 'to put object image.'
)
if preprocess_image.height > row_height:
if not isinstance(preprocess_image, GPUImage):
raise ValueError(
'Preprocessing function must return Image object.'
)
if output_image is not None:
preprocess_image = preprocess_image.resize(
resolution=(output_image.width, output_image.height),
method=output_image.method,
interpolation=output_image.cv2_interpolation,
)
if left + preprocess_image.width > frame_image.width:
left = 0
if row_height == 0:
row_height = preprocess_image.height

frame_image.paste(preprocess_image, (left, top))
nvds_obj_meta.rect_params.top = top
nvds_obj_meta.rect_params.left = left
nvds_obj_meta.rect_params.width = preprocess_image.width
nvds_obj_meta.rect_params.height = preprocess_image.height
left += preprocess_image.width
top += row_height
row_height = 0
if top >= frame_image.height:
raise ValueError(
'There is no place on frame ' 'to put object image.'
)
if top + preprocess_image.height > frame_image.height:
raise ValueError(
'There is no place on frame ' 'to put object image.'
)
if preprocess_image.height > row_height:
row_height = preprocess_image.height

frame_image.paste(preprocess_image, (left, top))
nvds_obj_meta.rect_params.top = top
nvds_obj_meta.rect_params.left = left
nvds_obj_meta.rect_params.width = preprocess_image.width
nvds_obj_meta.rect_params.height = preprocess_image.height
left += preprocess_image.width

for stream in self._stream_pool:
stream.waitForCompletion()

def restore_frame(self, buffer: Gst.Buffer):
nvds_batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(buffer))
Expand Down
43 changes: 0 additions & 43 deletions savant/deepstream/cudastream.py

This file was deleted.

2 changes: 1 addition & 1 deletion savant/deepstream/drawfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def __init__(self, **kwargs):
self.default_spec_no_track_id = get_default_draw_spec(track_id=False)

def draw(self, buffer: Gst.Buffer, frame_meta: NvDsFrameMeta):
stream = self.get_cuda_stream(frame_meta)
with nvds_to_gpu_mat(buffer, frame_meta.frame_meta) as frame_mat:
stream = self.get_cuda_stream(frame_meta)
with Artist(frame_mat, stream) as artist:
self.draw_on_frame(frame_meta, artist)

Expand Down
7 changes: 4 additions & 3 deletions savant/deepstream/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def __init__(
self._sources = SourceInfoRegistry()

# c++ preprocessing class
self._objects_preprocessing = ObjectsPreprocessing()
self._objects_preprocessing = ObjectsPreprocessing(self._batch_size)

self._internal_attrs = set()
telemetry: TelemetryParameters = kwargs['telemetry']
Expand Down Expand Up @@ -203,6 +203,7 @@ def add_element(
if element_idx is not None:
if isinstance(element, PyFuncElement):
gst_element.set_property('pipeline', self._video_pipeline)
gst_element.set_property('stream-pool-size', self._batch_size)
# TODO: add stage names to element config?
if isinstance(element_idx, int):
stage = self._element_stages[element_idx]
Expand Down Expand Up @@ -814,8 +815,8 @@ def _update_meta_for_single_frame(
else:
self._logger.debug('Skipping empty primary object.')
continue
if self._logger.isEnabledFor(logging.DEBUG):
self._logger.debug(
if self._logger.isEnabledFor(logging.TRACE):
self._logger.trace(
'Collecting object (frame src %s, IDX %s, PTS %s): %s',
video_frame.source_id,
frame_idx,
Expand Down
Loading

0 comments on commit cedebfc

Please sign in to comment.