Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

c++ inference #15

Open
interstellar-space opened this issue Jun 3, 2024 · 2 comments
Open

c++ inference #15

interstellar-space opened this issue Jun 3, 2024 · 2 comments

Comments

@interstellar-space
Copy link

interstellar-space commented Jun 3, 2024

I wrote some C++ code, but the inference results are different from Python. Can you help me take a look?

#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/opencv.hpp>
#include <NvInfer.h>
#include <cuda_runtime_api.h>
#include <NvInferRuntimeCommon.h>
#include <algorithm>
#include <cmath>
#include <numeric>
#include <Eigen/Dense>
#include <unsupported/Eigen/Splines>

using namespace nvinfer1;

const std::vector<cv::Scalar> COLORS = {
    cv::Scalar(255, 0, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255),
    cv::Scalar(255, 255, 0), cv::Scalar(255, 0, 255), cv::Scalar(0, 255, 255),
    cv::Scalar(128, 255, 0), cv::Scalar(255, 128, 0), cv::Scalar(128, 0, 255),
    cv::Scalar(255, 0, 128), cv::Scalar(0, 128, 255), cv::Scalar(0, 255, 128),
    cv::Scalar(128, 255, 255), cv::Scalar(255, 128, 255), cv::Scalar(255, 255, 128),
    cv::Scalar(60, 180, 0), cv::Scalar(180, 60, 0), cv::Scalar(0, 60, 180),
    cv::Scalar(0, 180, 60), cv::Scalar(60, 0, 180), cv::Scalar(180, 0, 60)};

class Lane
{
public:
    Lane(const std::vector<cv::Point2f> &points, float invalid_value = -2.0f)
        : points(points), invalid_value(invalid_value)
    {
        // Initialize spline interpolation using Eigen
        Eigen::VectorXd x(points.size()), y(points.size());
        for (size_t i = 0; i < points.size(); ++i)
        {
            x[i] = points[i].y;
            y[i] = points[i].x;
        }
        spline = Eigen::SplineFitting<Eigen::Spline<double, 1>>::Interpolate(y.transpose(), std::min<int>(3, points.size() - 1), x);
        min_y = x.minCoeff() - 0.01;
        max_y = x.maxCoeff() + 0.01;
    }

    std::vector<cv::Point2f> to_array() const
    {
        std::vector<cv::Point2f> lane;
        for (int y = 710; y >= 150; y -= 10)
        {
            double x = spline(y)(0);
            if (x >= 0 && x < 1)
            {
                lane.emplace_back(x * 1280, y);
            }
        }
        return lane;
    }

private:
    std::vector<cv::Point2f> points;
    float invalid_value;
    Eigen::Spline<double, 1> spline;
    double min_y, max_y;
};

class Logger : public nvinfer1::ILogger
{
    void log(Severity severity, const char *msg) noexcept override
    {
        if (severity <= Severity::kINFO)
        {
            std::cerr << msg << std::endl;
        }
    }
};

class CLRNetDemo
{
public:
    CLRNetDemo(const std::string &engine_path)
    {
        // Load TensorRT engine
        std::ifstream engine_file(engine_path, std::ios::binary);
        std::vector<char> engine_data((std::istreambuf_iterator<char>(engine_file)), std::istreambuf_iterator<char>());
        runtime = createInferRuntime(logger);
        engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
        context = engine->createExecutionContext();

        // Initialize input and output bindings
        for (int i = 0; i < engine->getNbBindings(); ++i)
        {
            if (engine->bindingIsInput(i))
            {
                input_binding = i;
            }
            else
            {
                output_binding = i;
            }
        }

        // Allocate memory for input and output
        auto input_dims = engine->getBindingDimensions(input_binding);
        auto output_dims = engine->getBindingDimensions(output_binding);
        size_t input_size = 1;
        size_t output_size = 1;
        for (int i = 0; i < input_dims.nbDims; ++i)
        {
            input_size *= input_dims.d[i];
        }
        for (int i = 0; i < output_dims.nbDims; ++i)
        {
            output_size *= output_dims.d[i];
        }
        cudaMalloc(&buffers[input_binding], input_size * sizeof(float));
        cudaMalloc(&buffers[output_binding], output_size * sizeof(float));
        cudaStreamCreate(&stream);
    }

    ~CLRNetDemo()
    {
        cudaFree(buffers[input_binding]);
        cudaFree(buffers[output_binding]);
        cudaStreamDestroy(stream);
        context->destroy();
        engine->destroy();
        runtime->destroy();
    }

    cv::Mat forward(const cv::Mat &img)
    {
        // Preprocess input image
        cv::Mat input_img = img(cv::Rect(0, 160, img.cols, img.rows - 160));
        cv::resize(input_img, input_img, cv::Size(800, 320), cv::INTER_CUBIC);
        input_img.convertTo(input_img, CV_32FC3, 1.0 / 255.0);

        // Transpose the image to match the model input
        cv::Mat input_img_transposed;
        cv::dnn::blobFromImage(input_img, input_img_transposed);

        // Allocate memory for input and output
        std::vector<float> input_data(input_img_transposed.total() * input_img_transposed.channels());
        std::memcpy(input_data.data(), input_img_transposed.data, input_data.size() * sizeof(float));

        auto output_dims = engine->getBindingDimensions(output_binding);
        size_t output_size = 1;
        for (int i = 0; i < output_dims.nbDims; ++i)
        {
            output_size *= output_dims.d[i];
        }
        std::vector<float> output_data(output_size);

        // Execute inference
        cudaMemcpyAsync(buffers[input_binding], input_data.data(), input_data.size() * sizeof(float), cudaMemcpyHostToDevice, stream);
        context->enqueueV2(buffers, stream, nullptr);
        cudaMemcpyAsync(output_data.data(), buffers[output_binding], output_data.size() * sizeof(float), cudaMemcpyDeviceToHost, stream);
        cudaStreamSynchronize(stream);

        // Postprocess output
        auto lanes = get_lanes(output_data);
        return imshow_lanes(img, lanes);
    }

private:
    IRuntime *runtime;
    ICudaEngine *engine;
    IExecutionContext *context;
    int input_binding, output_binding;
    void *buffers[2];
    cudaStream_t stream;
    Logger logger;

    std::vector<Lane> get_lanes(const std::vector<float> &output)
    {
        std::vector<Lane> decoded;
        std::vector<std::vector<float>> predictions(output.size() / 78, std::vector<float>(78));

        for (size_t i = 0; i < predictions.size(); ++i)
        {
            std::copy(output.begin() + i * 78, output.begin() + (i + 1) * 78, predictions[i].begin());
        }

        for (auto &prediction : predictions)
        {
            std::vector<float> scores = softmax({prediction[0], prediction[1]});
            std::cout << "scores: " << scores[0] << ", " << scores[1] << "\n";
            if (scores[1] < 0.4)
            {
                continue;
            }

            std::vector<std::vector<float>> nms_predictions;
            for (size_t i = 0; i < prediction.size(); ++i)
            {
                if (i < 4 || i >= 5)
                {
                    nms_predictions.push_back(prediction);
                }
            }
            std::cout << "nms_predictions: " << nms_predictions.size() << "\n";

            for (auto &nms_prediction : nms_predictions)
            {
                nms_prediction[4] *= 71;
                for (size_t i = 5; i < nms_prediction.size(); ++i)
                {
                    nms_prediction[i] *= 1279;
                }
            }

            auto keep = Lane_nms(nms_predictions, scores, 50, 5);
            std::vector<std::vector<float>> filtered_predictions;
            for (auto idx : keep)
            {
                filtered_predictions.push_back(predictions[idx]);
            }

            for (auto &filtered_prediction : filtered_predictions)
            {
                filtered_prediction[5] = std::round(filtered_prediction[5] * 71);
            }
            std::cout << "filtered_predictions: " << filtered_predictions.size() << "\n";

            auto pred = predictions_to_pred(filtered_predictions);
            decoded.insert(decoded.end(), pred.begin(), pred.end());
        }
        return decoded;
    }

    cv::Mat imshow_lanes(const cv::Mat &img, const std::vector<Lane> &lanes)
    {
        cv::Mat output_img = img.clone();
        for (size_t i = 0; i < lanes.size(); ++i)
        {
            auto lane_points = lanes[i].to_array();
            for (const auto &point : lane_points)
            {
                if (point.x > 0 && point.y > 0)
                {
                    cv::circle(output_img, point, 5, COLORS[i % COLORS.size()], -1);
                }
            }

            for (size_t j = 1; j < lane_points.size(); ++j)
            {
                if (lane_points[j - 1].x > 0 && lane_points[j - 1].y > 0 && lane_points[j].x > 0 && lane_points[j].y > 0)
                {
                    cv::line(output_img, lane_points[j - 1], lane_points[j], COLORS[i % COLORS.size()], 4);
                }
            }
        }
        return output_img;
    }

    std::vector<float> softmax(const std::vector<float> &x)
    {
        std::vector<float> y(x.size());
        float max_val = *std::max_element(x.begin(), x.end());
        float sum = 0.0f;
        for (size_t i = 0; i < x.size(); ++i)
        {
            y[i] = std::exp(x[i] - max_val);
            sum += y[i];
        }
        for (size_t i = 0; i < x.size(); ++i)
        {
            y[i] /= sum;
        }
        return y;
    }

    bool Lane_IOU(const std::vector<float> &parent_box, const std::vector<float> &compared_box, float threshold)
    {
        int n_offsets = 72;
        int n_strips = n_offsets - 1;

        int start_a = static_cast<int>(parent_box[2] * n_strips + 0.5);
        int start_b = static_cast<int>(compared_box[2] * n_strips + 0.5);
        int start = std::max(start_a, start_b);
        int end_a = start_a + static_cast<int>(parent_box[4] - 1 + 0.5 - ((parent_box[4] - 1) < 0));
        int end_b = start_b + static_cast<int>(compared_box[4] - 1 + 0.5 - ((compared_box[4] - 1) < 0));
        int end = std::min({end_a, end_b, 71});
        if ((end - start) < 0)
        {
            return false;
        }
        float dist = 0.0f;
        for (int i = 5 + start; i <= 5 + end; ++i)
        {
            if (parent_box[i] < compared_box[i])
            {
                dist += compared_box[i] - parent_box[i];
            }
            else
            {
                dist += parent_box[i] - compared_box[i];
            }
        }
        return dist < (threshold * (end - start + 1));
    }

    std::vector<int> Lane_nms(const std::vector<std::vector<float>> &proposals, const std::vector<float> &scores, float overlap, int top_k)
    {
        std::vector<int> keep_index;
        std::vector<int> indices(scores.size());
        std::iota(indices.begin(), indices.end(), 0);
        std::sort(indices.begin(), indices.end(), [&scores](int a, int b)
                  { return scores[a] > scores[b]; });

        std::vector<int> r_filters(scores.size(), 0);

        for (size_t i = 0; i < indices.size(); ++i)
        {
            if (r_filters[i] == 1)
            {
                continue;
            }
            keep_index.push_back(indices[i]);
            if (keep_index.size() > static_cast<size_t>(top_k))
            {
                break;
            }
            if (i == indices.size() - 1)
            {
                break;
            }
            for (size_t j = i + 1; j < indices.size(); ++j)
            {
                if (Lane_IOU(proposals[indices[i]], proposals[indices[j]], overlap))
                {
                    r_filters[j] = 1;
                }
            }
        }
        return keep_index;
    }

    std::vector<Lane> predictions_to_pred(const std::vector<std::vector<float>> &predictions)
    {
        std::vector<Lane> lanes;
        for (const auto &lane : predictions)
        {
            std::vector<float> lane_xs(lane.begin() + 6, lane.end());
            int start = std::min(std::max(0, static_cast<int>(std::round(lane[2] * 71))), 71);
            int length = static_cast<int>(std::round(lane[5]));
            int end = start + length - 1;
            end = std::min(end, 71);

            std::vector<bool> mask(start, false);
            for (int i = 0; i < start; ++i)
            {
                if (lane_xs[i] >= 0 && lane_xs[i] <= 1)
                {
                    mask[i] = true;
                }
            }

            for (int i = 0; i < start; ++i)
            {
                if (!mask[i])
                {
                    lane_xs[i] = -2;
                }
            }

            for (int i = end + 1; i < lane_xs.size(); ++i)
            {
                lane_xs[i] = -2;
            }

            std::vector<float> lane_ys;
            for (int i = 0; i < lane_xs.size(); ++i)
            {
                if (lane_xs[i] >= 0)
                {
                    lane_ys.push_back(1.0f - static_cast<float>(i) / 71.0f);
                }
            }

            std::vector<cv::Point2f> points;
            for (int i = 0; i < lane_xs.size(); ++i)
            {
                if (lane_xs[i] >= 0)
                {
                    points.emplace_back(lane_xs[i] * 1280, lane_ys[i] * (720 - 160) + 160);
                }
            }

            if (points.size() > 1)
            {
                lanes.emplace_back(points);
            }
        }
        return lanes;
    }
};

int main(int argc, char *argv[])
{
    if (argc != 3)
    {
        std::cout << argv[0] << ": <engine> <image>" << std::endl;
        return 0;
    }

    CLRNetDemo isnet(argv[1]);
    cv::Mat image = cv::imread(argv[2]);
    if (image.empty())
    {
        std::cerr << "Error: Could not open or find the image!" << std::endl;
        return -1;
    }
    cv::Mat output = isnet.forward(image);
    cv::imwrite("output_trt.png", output);
    return 0;
}
@xjock
Copy link

xjock commented Jun 4, 2024

solved?

@interstellar-space
Copy link
Author

interstellar-space commented Jun 4, 2024

@xjock no... Here is my code, but it cannot draw any lane lines

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudawarping.hpp>
#include <opencv2/core.hpp>
#include <opencv2/cudaarithm.hpp>
#include <algorithm>
#include <numeric>

struct Detection
{
    float background;
    float foreground;
    float start_y;
    float start_x;
    float theta;
    float length;
    float lane_x_coordinates[72];
};

// utilities ----------------------------------------------------------------------------------------------------------
// class to log errors, warnings, and other information during the build and inference phases
class Logger : public nvinfer1::ILogger
{
public:
    void log(Severity severity, const char *msg) noexcept override
    {
        // remove this 'if' if you need more logged info
        if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR))
        {
            std::cout << msg << "\n";
        }
    }
} gLogger;

// destroy TensorRT objects if something goes wrong
struct TRTDestroy
{
    template <class T>
    void operator()(T *obj) const
    {
        if (obj)
        {
            obj->destroy();
        }
    }
};

template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;

// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims &dims)
{
    size_t size = 1;
    for (size_t i = 0; i < dims.nbDims; ++i)
    {
        size *= dims.d[i];
    }
    return size;
}

// preprocessing stage ------------------------------------------------------------------------------------------------
void preprocessImage(cv::cuda::GpuMat &gpu_frame, float *gpu_input, const nvinfer1::Dims &dims)
{
    auto input_width = dims.d[2];
    auto input_height = dims.d[1];
    auto channels = dims.d[0];
    auto input_size = cv::Size(input_width, input_height);
    // resize
    cv::cuda::GpuMat resized;
    cv::cuda::resize(gpu_frame, resized, input_size, 0, 0, cv::INTER_NEAREST);
    // normalize
    cv::cuda::GpuMat flt_image;
    resized.convertTo(flt_image, CV_32FC3, 1.f / 255.f);
    cv::cuda::subtract(flt_image, cv::Scalar(0.485f, 0.456f, 0.406f), flt_image, cv::noArray(), -1);
    cv::cuda::divide(flt_image, cv::Scalar(0.229f, 0.224f, 0.225f), flt_image, 1, -1);
    // to tensor
    std::vector<cv::cuda::GpuMat> chw;
    for (size_t i = 0; i < channels; ++i)
    {
        chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
    }
    cv::cuda::split(flt_image, chw);
}

std::vector<std::vector<float>> softmax(const std::vector<std::vector<float>> &x)
{
    std::vector<std::vector<float>> y(x.size(), std::vector<float>(x[0].size()));

    // Assume that the softmax is performed along the last axis (columns).
    for (size_t i = 0; i < x.size(); ++i)
    {
        float maxVal = *std::max_element(x[i].begin(), x[i].end());

        std::vector<float> expVec(x[i].size());
        float sum = 0.0f;
        for (size_t j = 0; j < x[i].size(); ++j)
        {
            expVec[j] = exp(x[i][j] - maxVal);
            sum += expVec[j];
        }

        for (size_t j = 0; j < y[i].size(); ++j)
        {
            y[i][j] = expVec[j] / sum;
        }
    }

    return y;
}

bool Lane_IOU(const std::vector<float> &parent_box, const std::vector<float> &compared_box, float threshold)
{
    int n_offsets = 72;
    int n_strips = n_offsets - 1;

    int start_a = static_cast<int>(parent_box[2] * n_strips + 0.5);
    int start_b = static_cast<int>(compared_box[2] * n_strips + 0.5);
    int start = std::max(start_a, start_b);
    int end_a = start_a + static_cast<int>(parent_box[4] - 1 + 0.5 - ((parent_box[4] - 1) < 0));
    int end_b = start_b + static_cast<int>(compared_box[4] - 1 + 0.5 - ((compared_box[4] - 1) < 0));
    int end = std::min({end_a, end_b, 71});
    if ((end - start) < 0)
    {
        return false;
    }
    float dist = 0.0f;
    for (int i = 5 + start; i <= 5 + end; ++i)
    {
        if (parent_box[i] < compared_box[i])
        {
            dist += compared_box[i] - parent_box[i];
        }
        else
        {
            dist += parent_box[i] - compared_box[i];
        }
    }
    return dist < (threshold * (end - start + 1));
}

std::vector<int> Lane_nms(const std::vector<std::vector<float>> &proposals, const std::vector<float> &scores, float overlap = 50, int top_k = 4)
{
    std::vector<int> keep_index;
    std::vector<size_t> indices(scores.size());
    std::iota(indices.begin(), indices.end(), 0);

    // Sort indices based on corresponding scores in descending order
    std::sort(indices.begin(), indices.end(), [&scores](int a, int b)
              { return scores[a] > scores[b]; });

    std::vector<int> r_filters(scores.size(), 0);

    for (size_t i = 0; i < indices.size(); ++i)
    {
        size_t index = indices[i];
        if (r_filters[index] == 1) // Ensure we check r_filters for the right index
        {
            continue;
        }

        keep_index.push_back(index);

        if (static_cast<int>(keep_index.size()) > top_k) // We cast size to int to compare with top_k
        {
            break;
        }

        if (i == indices.size() - 1) // If it's the last index, break out of the loop
        {
            break;
        }

        // Iterate over the rest of the proposals from this point on
        for (size_t j = i + 1; j < indices.size(); ++j)
        {
            size_t sub_index = indices[j];
            if (!r_filters[sub_index]) // Check if not already filtered
            {
                if (Lane_IOU(proposals[index], proposals[sub_index], overlap))
                {
                    r_filters[sub_index] = 1;
                }
            }
        }
    }

    // Resize to remove any excess elements in case fewer than top_k were kept
    keep_index.resize(std::min(top_k, static_cast<int>(keep_index.size())));

    return keep_index;
}
std::vector<std::vector<cv::Point2f>> predictions_to_pred(const std::vector<std::vector<float>> &predictions,
                                                          const std::vector<float> &prior_ys,
                                                          int n_strips, int ori_img_w, int ori_img_h, int img_w, int img_h, int cut_height)
{
    std::vector<std::vector<cv::Point2f>> lanes;

    for (const auto &lane : predictions)
    {
        std::vector<float> lane_xs(lane.begin() + 6, lane.end()); // normalized value
        int start = std::min(std::max(0, static_cast<int>(round(lane[2] * n_strips))), n_strips);
        int length = static_cast<int>(round(lane[5]));
        int end = start + length - 1;
        end = std::min(end, static_cast<int>(prior_ys.size()) - 1);

        // Extend prediction until x is outside the image
        std::vector<bool> mask(start, false);
        for (int i = start - 1; i >= 0; --i)
        {
            if (lane_xs[i] < 0.0f || lane_xs[i] > 1.0f)
            {
                mask[i] = true;
            }
            else if (i < start - 1 && mask[i + 1])
            {
                mask[i] = true;
            }
        }

        std::fill(lane_xs.begin() + end + 1, lane_xs.end(), -2.0f);
        for (int i = 0; i < start; ++i)
        {
            if (mask[i])
            {
                lane_xs[i] = -2.0f;
            }
        }

        std::vector<float> lane_ys;
        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            if (lane_xs[i] >= 0)
            {
                lane_ys.push_back(prior_ys[i]);
            }
        }

        lane_xs.erase(std::remove_if(lane_xs.begin(), lane_xs.end(),
                                     [](float x)
                                     { return x < 0; }),
                      lane_xs.end());

        if (lane_xs.size() <= 1)
        {
            continue;
        }

        std::reverse(lane_xs.begin(), lane_xs.end());
        std::reverse(lane_ys.begin(), lane_ys.end());

        auto scale_x = static_cast<float>(ori_img_w) / img_w;
        auto scale_y = static_cast<float>(ori_img_h) / (img_h - cut_height);

        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            lane_xs[i] = lane_xs[i] * scale_x;
            lane_ys[i] = (lane_ys[i] * (img_h - cut_height) + cut_height) * scale_y;
        }

        std::vector<cv::Point2f> points;
        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            points.emplace_back(lane_xs[i], lane_ys[i]);
        }

        std::cout << "lane_xs: ";
        for (const auto &x : lane_xs)
        {
            std::cout << x << " ";
        }
        std::cout << "\nlane_ys: ";
        for (const auto &y : lane_ys)
        {
            std::cout << y << " ";
        }
        std::cout << "\n";

        lanes.push_back(points);
    }

    std::cout << "lanes: " << lanes.size() << "\n";

    return lanes;
}

cv::Mat imshow_lanes(cv::Mat &img, const std::vector<std::vector<cv::Point2f>> &lanes, int width = 4)
{
    std::vector<std::vector<cv::Point>> lanes_xys;

    for (const auto &lane : lanes)
    {
        std::vector<cv::Point> xys;
        for (const auto &point : lane)
        {
            if (point.x <= 0.0f || point.y <= 0.0f)
            {
                continue;
            }
            int x = static_cast<int>(point.x);
            int y = static_cast<int>(point.y);
            xys.emplace_back(x, y);
        }
        if (!xys.empty())
        {
            lanes_xys.push_back(xys);
        }
    }
    std::cout << "lanes_xys: " << lanes_xys.size() << "\n";

    std::sort(lanes_xys.begin(), lanes_xys.end(),
              [](const std::vector<cv::Point> &a, const std::vector<cv::Point> &b)
              { return a[0].x < b[0].x; });

    std::vector<cv::Scalar> COLORS = {cv::Scalar(255, 0, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 255)};

    for (size_t idx = 0; idx < lanes_xys.size(); ++idx)
    {
        const auto &xys = lanes_xys[idx];
        for (size_t i = 1; i < xys.size(); ++i)
        {
            cv::line(img, xys[i - 1], xys[i], COLORS[idx % COLORS.size()], width);
        }
    }

    cv::imwrite("test.jpg", img);
    // cv::imshow("Lanes", img);
    // cv::waitKey(0);

    return img;
}

// post-processing stage ----------------------------------------------------------------------------------------------
void postprocessResults(cv::Mat &frame, float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
{
    float conf_threshold = 0.4f;

    // copy results from GPU to CPU
    std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
    cudaMemcpy(cpu_output.data(), gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);
    for (int i = 0; i < cpu_output.size(); ++i)
    {
        std::cout << cpu_output[i] << " ";
        if (!((i + 1) % 78))
        {
            std::cout << "\n\n";
        }
    }
    std::cout << "------------------------\n";
    std::vector<std::vector<float>> detections(cpu_output.size() / 78, std::vector<float>(2));
    for (size_t i = 0; i < cpu_output.size() / 78; ++i)
    {
        detections[i][0] = cpu_output[i * 78];
        detections[i][1] = cpu_output[(i + 1) * 78];
    }
    const auto xyscores = softmax(detections);
    for (const auto &score : xyscores)
    {
        for (const auto &val : score)
        {
            std::cout << val << " ";
        }
        std::cout << "\n";
    }
    std::cout << "------------------------\n";
    std::vector<float> scores;
    std::vector<std::vector<float>> predictions(cpu_output.size() / 78, std::vector<float>(78));
    for (int i = 0; i < xyscores.size(); ++i)
    {
        scores.emplace_back(xyscores[i][1]);
        if (xyscores[i][1] >= conf_threshold)
        {
            std::copy(cpu_output.begin() + i * 78, cpu_output.begin() + (i + 1) * 78, predictions[i].begin());
        }
    }
    std::cout << "predictions: " << predictions.size() << "\n";

    int n_offsets = 72;
    int n_strips = n_offsets - 1;
    int img_w = 800;
    int img_h = 320;
    int ori_img_w = 1280;
    int ori_img_h = 720;
    int cut_height = 160;
    std::vector<std::vector<float>> nms_predictions;
    for (auto &prediction : predictions)
    {
        prediction[5] = std::round(prediction[5] * n_strips);
        std::vector<float> predict(sizeof(Detection) - sizeof(float));
        for (size_t j = 0; j < 4; ++j)
        {
            predict[j] = prediction[j];
        }
        for (size_t j = 5; j < prediction.size(); ++j)
        {
            predict[j - 1] = prediction[j];
        }
        nms_predictions.emplace_back(predict);
    }
    std::cout << "nms_predictions: " << nms_predictions.size() << "\n";

    for (auto &nms_prediction : nms_predictions)
    {
        nms_prediction[4] *= n_strips;
        for (size_t i = 5; i < nms_prediction.size(); ++i)
        {
            nms_prediction[i] *= ori_img_w - 1;
        }
    }

    auto keep = Lane_nms(nms_predictions, scores, 50, 5);
    std::cout << "keep: " << keep.size() << "\n";

    std::vector<float> prior_ys(n_offsets);
    for (int i = 0; i < n_offsets; ++i)
    {
        prior_ys[i] = 1.0f - static_cast<float>(i) / n_strips;
    }

    auto lanes = predictions_to_pred(predictions, prior_ys, n_strips, ori_img_w, ori_img_h, img_w, img_h, cut_height);
    imshow_lanes(frame, lanes);
}

// initialize TensorRT engine and parse ONNX model --------------------------------------------------------------------
void parseOnnxModel(const std::string &model_path, TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
                    TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
    TRTUniquePtr<nvinfer1::IBuilder> builder{nvinfer1::createInferBuilder(gLogger)};
    TRTUniquePtr<nvinfer1::INetworkDefinition> network{builder->createNetworkV2(1)};
    TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
    TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
    // parse ONNX
    if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
    {
        std::cerr << "ERROR: could not parse the model.\n";
        return;
    }
    // allow TensorRT to use up to 1GB of GPU memory for tactic selection.
    config->setMaxWorkspaceSize(1ULL << 30);
    // use FP16 mode if possible
    if (builder->platformHasFastFp16())
    {
        std::cout << "fp16\n";
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
    // we have only one image in batch
    builder->setMaxBatchSize(1);
    // generate TensorRT engine optimized for the target platform
    engine.reset(builder->buildEngineWithConfig(*network, *config));
    context.reset(engine->createExecutionContext());
}

// initialize TensorRT engine from serialized model --------------------------------------------------------------------
void loadTrtEngine(const std::string &engine_path, TRTUniquePtr<nvinfer1::IRuntime> &runtime,
                   TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
                   TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
    std::ifstream engine_file(engine_path, std::ios::binary);
    if (!engine_file)
    {
        std::cerr << "ERROR: could not open the engine file.\n";
        return;
    }

    // 计算文件大小
    engine_file.seekg(0, engine_file.end);
    size_t file_size = engine_file.tellg();
    engine_file.seekg(0, engine_file.beg);

    // 加载文件内容到内存中
    std::vector<char> trt_model_stream(file_size);
    engine_file.read(trt_model_stream.data(), file_size);
    engine_file.close();

    // 创建runtime
    runtime.reset(nvinfer1::createInferRuntime(gLogger));

    // 反序列化计划文件并创建引擎
    engine.reset(runtime->deserializeCudaEngine(trt_model_stream.data(), file_size, nullptr));

    // 创建上下文
    context.reset(engine->createExecutionContext());
}

// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
    if (argc < 3)
    {
        std::cerr << "usage: " << argv[0] << " model.onnx image.jpg\n";
        return -1;
    }
    std::string model_path(argv[1]);
    std::string image_path(argv[2]);
    int batch_size = 1;

    // initialize TensorRT engine and parse ONNX model
    TRTUniquePtr<nvinfer1::IRuntime> runtime{nullptr};
    TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
    TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
    // parseOnnxModel(model_path, engine, context);
    loadTrtEngine(model_path, runtime, engine, context);

    // get sizes of input and output and allocate memory required for input data and for output data
    std::vector<nvinfer1::Dims> input_dims;               // we expect only one input
    std::vector<nvinfer1::Dims> output_dims;              // and one output
    std::vector<void *> buffers(engine->getNbBindings()); // buffers for input and output data
    for (size_t i = 0; i < engine->getNbBindings(); ++i)
    {
        auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size * sizeof(float);
        cudaMalloc(&buffers[i], binding_size);
        if (engine->bindingIsInput(i))
        {
            input_dims.emplace_back(engine->getBindingDimensions(i));
        }
        else
        {
            output_dims.emplace_back(engine->getBindingDimensions(i));
        }
    }
    if (input_dims.empty() || output_dims.empty())
    {
        std::cerr << "Expect at least one input and one output for network\n";
        return -1;
    }

    // read input image
    cv::Mat frame = cv::imread(image_path);
    if (frame.empty())
    {
        std::cerr << "Input image " << image_path << " load failed\n";
        return -1;
    }
    cv::cuda::GpuMat gpu_frame;
    // upload image to GPU
    gpu_frame.upload(frame);

    // preprocess input data
    preprocessImage(gpu_frame, (float *)buffers[0], input_dims[0]);
    // inference
    context->enqueue(batch_size, buffers.data(), 0, nullptr);
    // postprocess results
    postprocessResults(frame, (float *)buffers[1], output_dims[0], batch_size);

    for (void *buf : buffers)
    {
        cudaFree(buf);
    }
    return 0;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants