From d217aeda31d12667f792681de1867eaba8075048 Mon Sep 17 00:00:00 2001 From: yili123123 Date: Fri, 6 Jan 2023 23:26:18 +0800 Subject: [PATCH 1/3] efficientNMSPlugin: support class-independent nms with new parameter class_agnostic Signed-off-by: yili123123 --- .../efficientNMSPlugin/EfficientNMSPlugin_PluginConfig.yaml | 6 ++++++ plugin/efficientNMSPlugin/README.md | 5 +++++ plugin/efficientNMSPlugin/efficientNMSInference.cu | 6 +++++- plugin/efficientNMSPlugin/efficientNMSParameters.h | 1 + plugin/efficientNMSPlugin/efficientNMSPlugin.cpp | 5 +++++ samples/python/detectron2/create_onnx.py | 1 + samples/python/efficientdet/create_onnx.py | 1 + .../python/tensorflow_object_detection_api/create_onnx.py | 1 + 8 files changed, 25 insertions(+), 1 deletion(-) diff --git a/plugin/efficientNMSPlugin/EfficientNMSPlugin_PluginConfig.yaml b/plugin/efficientNMSPlugin/EfficientNMSPlugin_PluginConfig.yaml index 074308b69..3ce2e849d 100644 --- a/plugin/efficientNMSPlugin/EfficientNMSPlugin_PluginConfig.yaml +++ b/plugin/efficientNMSPlugin/EfficientNMSPlugin_PluginConfig.yaml @@ -9,6 +9,7 @@ versions: - max_output_boxes - background_class - score_activation + - class_agnostic - box_coding attribute_types: score_threshold: float32 @@ -16,6 +17,7 @@ versions: max_output_boxes: int32 background_class: int32 score_activation: int32 + class_agnostic: int32 box_coding: int32 attribute_length: score_threshold: 1 @@ -23,6 +25,7 @@ versions: max_output_boxes: 1 background_class: 1 score_activation: 1 + class_agnostic: 1 box_coding: 1 attribute_options: score_threshold: @@ -40,6 +43,9 @@ versions: score_activation: - 0 - 1 + class_agnostic: + - 0 + - 1 box_coding: - 0 - 1 diff --git a/plugin/efficientNMSPlugin/README.md b/plugin/efficientNMSPlugin/README.md index cb8603de2..989a62390 100644 --- a/plugin/efficientNMSPlugin/README.md +++ b/plugin/efficientNMSPlugin/README.md @@ -98,6 +98,7 @@ The following four output tensors are generated: |`int` |`max_output_boxes` |The maximum number of detections to output per image. |`int` |`background_class` |The label ID for the background class. If there is no background class, set it to `-1`. |`bool` |`score_activation` * |Set to true to apply sigmoid activation to the confidence scores during NMS operation. +|`bool` |`class_agnostic` |Set to true to do class-independent nms, otherwise, different class would do nms respectively. |`int` |`box_coding` |Coding type used for boxes (and anchors if applicable), 0 = BoxCorner, 1 = BoxCenterSize. Parameters marked with a `*` have a non-negligible effect on runtime latency. See the [Performance Tuning](#performance-tuning) section below for more details on how to set them optimally. @@ -134,6 +135,10 @@ The algorithm is highly sensitive to the selected `score_threshold` parameter. W Depending on network configuration, it is usually more efficient to provide raw scores (pre-sigmoid) to the NMS plugin scores input, and enable the `score_activation` parameter. Doing so applies a sigmoid activation only to the last `max_output_boxes` selected scores, instead of all the predicted scores, largely reducing the computational cost. +#### Class Independent NMS + +Some object detection networks/architectures like YOLO series need to use class independent nms operations. Do class independent nms if `class_agnostic` is enabled, otherwise, different class would do nms respectively. + #### Using the Fused Box Decoder When using networks with many anchors, such as EfficientDet or SSD, it may be more efficient to do box decoding within the NMS plugin. For this, pass the raw box predictions as the boxes input, and the default anchor coordinates as the optional third input to the plugin. diff --git a/plugin/efficientNMSPlugin/efficientNMSInference.cu b/plugin/efficientNMSPlugin/efficientNMSInference.cu index 28135b8c7..81d48fb81 100644 --- a/plugin/efficientNMSPlugin/efficientNMSInference.cu +++ b/plugin/efficientNMSPlugin/efficientNMSInference.cu @@ -314,12 +314,16 @@ __global__ void EfficientNMS(EfficientNMSParameters param, const int* topNumData for (int tile = 0; tile < numTiles; tile++) { + bool check_class = true; + if (!param.classAgnostic) + check_class = threadClass[tile] == testClass; + // IOU if (boxIdx[tile] > i && // Make sure two different boxes are being tested, and that it's a higher index; boxIdx[tile] < numSelectedBoxes && // Make sure the box is within numSelectedBoxes; blockState == 1 && // Signal that allows IOU checks to be performed; threadState[tile] == 0 && // Make sure this box hasn't been either dropped or kept already; - threadClass[tile] == testClass && // Compare only boxes of matching classes; + check_class && // Compare only boxes of matching classes when classAgnostic is false; lte_mp(threadScore[tile], testScore) && // Make sure the sorting order of scores is as expected; IOU(param, threadBox[tile], testBox) >= param.iouThreshold) // And... IOU overlap. { diff --git a/plugin/efficientNMSPlugin/efficientNMSParameters.h b/plugin/efficientNMSPlugin/efficientNMSParameters.h index 216455bbe..9cc4e6a6e 100644 --- a/plugin/efficientNMSPlugin/efficientNMSParameters.h +++ b/plugin/efficientNMSPlugin/efficientNMSParameters.h @@ -37,6 +37,7 @@ struct EfficientNMSParameters bool scoreSigmoid = false; bool clipBoxes = false; int boxCoding = 0; + bool classAgnostic = false; // Related to NMS Internals int numSelectedBoxes = 4096; diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp index 6edbd3d64..249553711 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp @@ -428,6 +428,7 @@ EfficientNMSPluginCreator::EfficientNMSPluginCreator() mPluginAttributes.emplace_back(PluginField("max_output_boxes", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("background_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("score_activation", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("class_agnostic", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("box_coding", nullptr, PluginFieldType::kINT32, 1)); mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); @@ -493,6 +494,10 @@ IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(const char* name, c PLUGIN_VALIDATE(scoreSigmoid == 0 || scoreSigmoid == 1); mParam.scoreSigmoid = static_cast(scoreSigmoid); } + if (!strcmp(attrName, "class_agnostic")) + { + mParam.classAgnostic = *(static_cast(fields[i].data)); + } if (!strcmp(attrName, "box_coding")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); diff --git a/samples/python/detectron2/create_onnx.py b/samples/python/detectron2/create_onnx.py index 90b293867..8e2c1e8eb 100644 --- a/samples/python/detectron2/create_onnx.py +++ b/samples/python/detectron2/create_onnx.py @@ -289,6 +289,7 @@ def NMS(self, boxes, scores, anchors, background_class, score_activation, max_pr 'score_threshold': max(0.01, score_threshold), 'iou_threshold': iou_threshold, 'score_activation': score_activation, + 'class_agnostic': False, 'box_coding': 1, } ) diff --git a/samples/python/efficientdet/create_onnx.py b/samples/python/efficientdet/create_onnx.py index 01897192a..0c66620cc 100644 --- a/samples/python/efficientdet/create_onnx.py +++ b/samples/python/efficientdet/create_onnx.py @@ -386,6 +386,7 @@ def get_anchor_np(output_idx, op): 'score_threshold': max(0.01, score_threshold), # Keep threshold to at least 0.01 for better efficiency 'iou_threshold': iou_threshold, 'score_activation': True, + 'class_agnostic': False, 'box_coding': 1, } nms_output_classes_dtype = np.int32 diff --git a/samples/python/tensorflow_object_detection_api/create_onnx.py b/samples/python/tensorflow_object_detection_api/create_onnx.py index 35b7064d8..3ecb1b939 100644 --- a/samples/python/tensorflow_object_detection_api/create_onnx.py +++ b/samples/python/tensorflow_object_detection_api/create_onnx.py @@ -367,6 +367,7 @@ def NMS(self, box_net_tensor, class_net_tensor, anchors_tensor, background_class 'score_threshold': max(0.01, score_threshold), 'iou_threshold': iou_threshold, 'score_activation': score_activation, + 'class_agnostic': False, 'box_coding': 1, } ) From 6d52bd91c6bd0adca5e84963b783047ec64899db Mon Sep 17 00:00:00 2001 From: yili123123 <120350735+yili123123@users.noreply.github.com> Date: Tue, 7 Feb 2023 11:19:36 +0800 Subject: [PATCH 2/3] Apply suggestions from code review Signed-off-by: yili123123 <120350735+yili123123@users.noreply.github.com> Co-authored-by: Samurdhi Karunaratne <97725867+samurdhikaru@users.noreply.github.com> --- plugin/efficientNMSPlugin/README.md | 4 ++-- plugin/efficientNMSPlugin/efficientNMSPlugin.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/efficientNMSPlugin/README.md b/plugin/efficientNMSPlugin/README.md index 989a62390..8a972b36b 100644 --- a/plugin/efficientNMSPlugin/README.md +++ b/plugin/efficientNMSPlugin/README.md @@ -98,7 +98,7 @@ The following four output tensors are generated: |`int` |`max_output_boxes` |The maximum number of detections to output per image. |`int` |`background_class` |The label ID for the background class. If there is no background class, set it to `-1`. |`bool` |`score_activation` * |Set to true to apply sigmoid activation to the confidence scores during NMS operation. -|`bool` |`class_agnostic` |Set to true to do class-independent nms, otherwise, different class would do nms respectively. +|`bool` |`class_agnostic` |Set to true to do class-independent NMS; otherwise, boxes of different classes would be considered separately during NMS. |`int` |`box_coding` |Coding type used for boxes (and anchors if applicable), 0 = BoxCorner, 1 = BoxCenterSize. Parameters marked with a `*` have a non-negligible effect on runtime latency. See the [Performance Tuning](#performance-tuning) section below for more details on how to set them optimally. @@ -137,7 +137,7 @@ Depending on network configuration, it is usually more efficient to provide raw #### Class Independent NMS -Some object detection networks/architectures like YOLO series need to use class independent nms operations. Do class independent nms if `class_agnostic` is enabled, otherwise, different class would do nms respectively. +Some object detection networks/architectures like YOLO series need to use class-independent NMS operations. If `class_agnostic` is enabled, class-independent NMS is performed; otherwise, different classes would do NMS separately. #### Using the Fused Box Decoder diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp index 249553711..ff6632573 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp @@ -496,7 +496,7 @@ IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(const char* name, c } if (!strcmp(attrName, "class_agnostic")) { - mParam.classAgnostic = *(static_cast(fields[i].data)); + mParam.classAgnostic = *(static_cast(fields[i].data)); } if (!strcmp(attrName, "box_coding")) { From 15338f4e97f27c32473b193a1418c0523105bbb9 Mon Sep 17 00:00:00 2001 From: yili123123 Date: Sun, 12 Feb 2023 13:04:16 +0800 Subject: [PATCH 3/3] change variable name Signed-off-by: yili123123 --- plugin/efficientNMSPlugin/efficientNMSInference.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/efficientNMSPlugin/efficientNMSInference.cu b/plugin/efficientNMSPlugin/efficientNMSInference.cu index 81d48fb81..3cf7e8ef4 100644 --- a/plugin/efficientNMSPlugin/efficientNMSInference.cu +++ b/plugin/efficientNMSPlugin/efficientNMSInference.cu @@ -314,16 +314,16 @@ __global__ void EfficientNMS(EfficientNMSParameters param, const int* topNumData for (int tile = 0; tile < numTiles; tile++) { - bool check_class = true; + bool ignoreClass = true; if (!param.classAgnostic) - check_class = threadClass[tile] == testClass; + ignoreClass = threadClass[tile] == testClass; // IOU if (boxIdx[tile] > i && // Make sure two different boxes are being tested, and that it's a higher index; boxIdx[tile] < numSelectedBoxes && // Make sure the box is within numSelectedBoxes; blockState == 1 && // Signal that allows IOU checks to be performed; threadState[tile] == 0 && // Make sure this box hasn't been either dropped or kept already; - check_class && // Compare only boxes of matching classes when classAgnostic is false; + ignoreClass && // Compare only boxes of matching classes when classAgnostic is false; lte_mp(threadScore[tile], testScore) && // Make sure the sorting order of scores is as expected; IOU(param, threadBox[tile], testBox) >= param.iouThreshold) // And... IOU overlap. {