yolov8-seg分割模型TensorRt部署，去掉torch

已完成的yolov8-seg分割模型TensorRt部署

准备
下载yolov8-seg模型
转化为onnx和trt
推理
- 写好的推理接口

准备

https://github.com/songjiahao-wq/yolov8_seg_trtinference.git下载代码
安装TensorRt=8.6版本，以及pip install -r requirements.txt

下载yolov8-seg模型

转化为onnx和trt

转化方法如下：

# tensorRT==8.6
## yolov8-seg CLI指令
### 转化ONNX模型
`python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0`

`python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 448 512 --device cuda:0`
### 导出trt模型
`python build.py --weights yolov8m-seg.onnx --fp16  --device cuda:0 --seg`
### 采用trtexec导出trt模型
`E:\Download\TensorRT-10.0.1.6\bin/trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16`
### 不需要torch环境推理
`python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart`
### 需要torch环境推理
`python infer-seg.py`


- [x] infer-seg-without-torch-port.py 调用接口，每次只保存mask.txt
- [x] infer-seg-without-torch.py 不需要torch调用，有cuda和pycuda

首先转化为onnx模型

python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0

然后转化为trt模型
有两种转化方式：
代码转化：python build.py --weights yolov8m-seg.onnx --fp16 --device cuda:0 --seg
trtexec转化：trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16

推理

推理方法有两种：
cudart推理，不包含torch

python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart
pycuda推理，不包含torch
`python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method pycuda
带torch的推理
python infer-seg.py

写好的推理接口

import argparse
import time
from pathlib import Path

import cv2
import numpy as np

from config import ALPHA, CLASSES, COLORS, MASK_COLORS
from models.utils import blob, letterbox, path_to_list, seg_postprocess
import torch


def clip_segments(segments, shape):
    """Clips segment coordinates (xy1, xy2, ...) to an image's boundaries given its shape (height, width)."""
    if isinstance(segments, torch.Tensor):  # faster individually
        segments[:, 0].clamp_(0, shape[1])  # x
        segments[:, 1].clamp_(0, shape[0])  # y
    else:  # np.array (faster grouped)
        segments[:, 0] = segments[:, 0].clip(0, shape[1])  # x
        segments[:, 1] = segments[:, 1].clip(0, shape[0])  # y


def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
    """Rescales segment coordinates from img1_shape to img0_shape, optionally normalizing them with custom padding."""
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    segments[:, 0] -= pad[0]  # x padding
    segments[:, 1] -= pad[1]  # y padding
    segments /= gain
    clip_segments(segments, img0_shape)
    if normalize:
        segments[:, 0] /= img0_shape[1]  # width
        segments[:, 1] /= img0_shape[0]  # height
    return segments


def masks2segments(masks, strategy="largest"):
    """Converts binary (n,160,160) masks to polygon segments with options for concatenation or selecting the largest
    segment.
    """
    segments = []
    for x in masks.int().cpu().numpy().astype("uint8"):
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if c:
            if strategy == "concat":  # concatenate all segments
                c = np.concatenate([x.reshape(-1, 2) for x in c])
            elif strategy == "largest":  # select largest segment
                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        else:
            c = np.zeros((0, 2))  # no segments found
        segments.append(c.astype("float32"))
    return segments


def keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0):
    # 组合成新的检测结果数组
    det = np.hstack((bboxes, scores[:, np.newaxis], labels[:, np.newaxis], np.array(segments)[:, np.newaxis]))

    if det.shape[0] == 0:
        return det  # 如果没有检测到任何对象，直接返回

    unique_classes = np.unique(det[:, 5])  # 获取所有独特的类标签
    max_conf_indices = []

    # 对每一个类别找到最高置信度的检测框
    cls_mask = det[:, 5] == classes  # 找到所有该类别的检测框
    cls_detections = det[cls_mask]  # 提取该类别的所有检测框
    # 计算每个检测框的面积
    areas = (cls_detections[:, 2] - cls_detections[:, 0]) * (
            cls_detections[:, 3] - cls_detections[:, 1])
    # 合并置信度和面积为一个复合评分，这里用置信度 + 面积的小部分作为评分
    scores_combined = cls_detections[:, 4] * 0.1 + 1.0 * areas
    # 找到评分最高的检测框
    max_score_index = np.argmax(scores_combined)
    # 找到原始的索引
    original_max_conf_index = np.where(cls_mask)[0][max_score_index]
    max_conf_indices.append(original_max_conf_index)
    # 选取评分最高的检测框
    return det[max_conf_indices][:, :4], det[max_conf_indices][:, 4], det[max_conf_indices][:, 5], det[
                                                                                                       max_conf_indices][
                                                                                                   :,
                                                                                                   6], max_conf_indices


class YOLOv8_seg_main:
    def __init__(self, args: argparse.Namespace):
        if args.method == 'cudart':
            from models.cudart_api import TRTEngine
        elif args.method == 'pycuda':
            from models.pycuda_api import TRTEngine
        else:
            raise NotImplementedError
        self.Engine = TRTEngine(args.engine)
        self.H, self.W = self.Engine.inp_info[0].shape[-2:]
        self.args = args

    def main(self, bgr, imagename, outtxtdir) -> None:
        outtxtdir = Path(outtxtdir)
        save_path = Path(args.out_dir)

        if not self.args.show and not save_path.exists():
            save_path.mkdir(parents=True, exist_ok=True)
        draw = bgr.copy()
        bgr, ratio, dwdh = letterbox(bgr, (self.W, self.H))
        dw, dh = int(dwdh[0]), int(dwdh[1])
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        tensor, seg_img = blob(rgb, return_seg=True)
        dwdh = np.array(dwdh * 2, dtype=np.float32)
        tensor = np.ascontiguousarray(tensor)
        # inference
        data = self.Engine(tensor)
        seg_img = seg_img[dh:self.H - dh, dw:self.W - dw, [2, 1, 0]]
        bboxes, scores, labels, masks = seg_postprocess(
            data, bgr.shape[:2], self.args.conf_thres, self.args.iou_thres)
        if bboxes.size == 0:
            # if no bounding box
            assert print(f'image: no object!')
        masks = masks[:, dh:self.H - dh, dw:self.W - dw, :]
        segments = [
            scale_segments(tensor.shape[2:], x, rgb.shape, normalize=True)
            for x in reversed(masks2segments(torch.from_numpy(masks)))
        ]

        bboxes -= dwdh
        bboxes /= ratio

        # 应用 keep_highest_conf_per_class 函数
        bboxes, scores, labels, segments, max_conf_indices = keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0)
        if args.show:
            masks = masks[max_conf_indices]
            mask_colors = MASK_COLORS[0]
            mask_colors = mask_colors.reshape(-1, 1, 1, 3) * ALPHA
            mask_colors = masks @ mask_colors
            inv_alph_masks = (1 - masks * 0.5).cumprod(0)
            mcs = (mask_colors * inv_alph_masks).sum(0) * 2
            seg_img = (seg_img * inv_alph_masks[-1] + mcs) * 255
            draw = cv2.resize(seg_img.astype(np.uint8), draw.shape[:2][::-1])

        if args.save_txt:
            seg = segments[0].reshape(-1)  # (n,2) to (n*2)
            line = (int(labels[0]), *seg)  # label format
            with open(outtxtdir / f"{Path(imagename).stem}.txt", "w") as f:
                f.write(("%g " * len(line)).rstrip() % line + "\n")

        if args.show:
            save_image = save_path / Path(imagename).name
            cv2.imwrite(str(save_image), draw)


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', type=str, default="../yolov8l-seg.engine", help='Engine file')
    parser.add_argument('--imgs', type=str, default="data", help='Images file')
    parser.add_argument('--show',
                        action='store_true',
                        default=False,
                        help='Show the detection results')
    parser.add_argument('--save_txt',
                        action='store_true',
                        default=True,
                        help='save_txt the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--conf-thres',
                        type=float,
                        default=0.25,
                        help='Confidence threshold')
    parser.add_argument('--iou-thres',
                        type=float,
                        default=0.25,
                        help='Confidence threshold')
    parser.add_argument('--method',
                        type=str,
                        default='cudart',
                        help='CUDART pipeline')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    YOLOv8_seg_main = YOLOv8_seg_main(args)
    imgpath = './data/1.jpg'
    outtxtdir = './output'
    bgr_img = cv2.imread(imgpath)
    t1 = time.time()
    for i in range(100):
        YOLOv8_seg_main.main(bgr_img, imgpath, outtxtdir)
    print(time.time() - t1)