From acd6892b290b9cb5b7c66f1c9c7081964cde8ae2 Mon Sep 17 00:00:00 2001 From: Donghak PARK Date: Wed, 14 Aug 2024 15:35:07 +0900 Subject: [PATCH] [Benchmarks] Add benchmark gitaction Add gitaction for benchmark, it will automatically run benchmark on clean ubuntu - now, just run Resnet Application on Ubuntu 22.04 - i will add more tests (Tensor Op, more applications) **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK --- .github/workflows/ubuntu_benchmarks.yml | 55 ++++++ benchmarks/benchmark_application/meson.build | 10 +- benchmarks/fake_data_gen/fake_data_gen.cpp | 173 +++++++++++++++++++ benchmarks/fake_data_gen/fake_data_gen.h | 124 +++++++++++++ benchmarks/fake_data_gen/meson.build | 2 + benchmarks/meson.build | 1 + meson.build | 2 +- 7 files changed, 360 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/ubuntu_benchmarks.yml create mode 100644 benchmarks/fake_data_gen/fake_data_gen.cpp create mode 100644 benchmarks/fake_data_gen/fake_data_gen.h create mode 100644 benchmarks/fake_data_gen/meson.build diff --git a/.github/workflows/ubuntu_benchmarks.yml b/.github/workflows/ubuntu_benchmarks.yml new file mode 100644 index 0000000000..f1b11745a0 --- /dev/null +++ b/.github/workflows/ubuntu_benchmarks.yml @@ -0,0 +1,55 @@ +name: Ubuntu Benchmarks + +on: + pull_request: + types: [opened, edited, reopened, synchronize] + +jobs: + meson_test: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-22.04 ] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: install additional package from PPA for testing + run: sudo add-apt-repository -y ppa:nnstreamer/ppa && sudo apt-get update + - name: install minimal requirements + run: sudo apt-get update && sudo apt-get install -y gcc g++ pkg-config libopenblas-dev libiniparser-dev libjsoncpp-dev libcurl3-dev tensorflow2-lite-dev nnstreamer-dev libglib2.0-dev libgstreamer1.0-dev libgtest-dev ml-api-common-dev flatbuffers-compiler ml-inference-api-dev libunwind-dev libbenchmark-dev + - name: install additional packages for features + run: sudo apt-get install -y python3-dev python3-numpy python3 + - name: install build systems + run: sudo apt install meson ninja-build + - run: meson setup build/ + env: + CC: gcc + - run: | + meson \ + --buildtype=plain \ + --prefix=/usr \ + --sysconfdir=/etc \ + --libdir=lib/x86_64-linux-gnu \ + --bindir=lib/nntrainer/bin \ + --includedir=include \ + -Dinstall-app=false \ + -Dreduce-tolerance=false \ + -Denable-debug=true \ + -Dml-api-support=enabled \ + -Denable-nnstreamer-tensor-filter=enabled \ + -Denable-nnstreamer-tensor-trainer=enabled \ + -Denable-nnstreamer-backbone=true \ + -Dcapi-ml-common-actual=capi-ml-common \ + -Dcapi-ml-inference-actual=capi-ml-inference \ + -Denable-capi=enabled \ + -Denable-benchmarks=true \ + -Denable-app=false + build + - run: ninja -C build + - name: run Benchmarks_ResNet + run: cd ./build/benchmarks/benchmark_application && ./Benchmark_ResNet diff --git a/benchmarks/benchmark_application/meson.build b/benchmarks/benchmark_application/meson.build index 3ed6df7deb..1f3a386b65 100644 --- a/benchmarks/benchmark_application/meson.build +++ b/benchmarks/benchmark_application/meson.build @@ -1,15 +1,13 @@ build_root = meson.build_root() sources = ['benchmark_resnet.cpp', - cifar_path / 'cifar_dataloader.cpp'] + fake_datagen_path / 'fake_data_gen.cpp'] -resnet_dependencies = [app_utils_dep, - iniparser_dep, - nntrainer_dep, +resnet_dependencies = [nntrainer_dep, nntrainer_ccapi_dep, benchmark_dep, ] executable('Benchmark_ResNet', sources, - include_directories : [include_directories('.'), cifar_include_dir], - dependencies : resnet_dependencies) \ No newline at end of file + include_directories : [include_directories('.'), fake_datagen_include_dir], + dependencies : resnet_dependencies) diff --git a/benchmarks/fake_data_gen/fake_data_gen.cpp b/benchmarks/fake_data_gen/fake_data_gen.cpp new file mode 100644 index 0000000000..fea4d3fa57 --- /dev/null +++ b/benchmarks/fake_data_gen/fake_data_gen.cpp @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2020 Jihoon Lee + * + * @file cifar_dataloader.h + * @date 24 Jun 2021s + * @brief dataloader for cifar + * @see https://github.com/nnstreamer/nntrainer + * @author Jihoon Lee + * @bug No known bugs except for NYI items + */ + +#include "fake_data_gen.h" + +#include +#include +#include +#include +#include + +namespace nntrainer::util { + +namespace { + +/** + * @brief fill label to the given memory + * + * @param data data to fill + * @param length size of the data + * @param label label + */ +void fillLabel(float *data, unsigned int length, unsigned int label) { + if (length == 1) { + *data = label; + return; + } + + memset(data, 0, length * sizeof(float)); + *(data + label) = 1; +} + +/** + * @brief fill last to the given memory + * @note this function increases iteration value, if last is set to true, + * iteration resets to 0 + * + * @param[in/out] iteration current iteration + * @param data_size Data size + * @return bool true if iteration has finished + */ +bool updateIteration(unsigned int &iteration, unsigned int data_size) { + if (iteration++ == data_size) { + iteration = 0; + return true; + } + return false; +}; + +} // namespace + +RandomDataLoader::RandomDataLoader(const std::vector &input_shapes, + const std::vector &output_shapes, + int data_size_) : + iteration(0), + data_size(data_size_), + input_shapes(input_shapes), + output_shapes(output_shapes), + input_dist(0, 255), + label_dist(0, output_shapes.front().width() - 1) { + NNTR_THROW_IF(output_shapes.empty(), std::invalid_argument) + << "output_shape size empty not supported"; + NNTR_THROW_IF(output_shapes.size() > 1, std::invalid_argument) + << "output_shape size > 1 is not supported"; +} + +void RandomDataLoader::next(float **input, float **label, bool *last) { + auto fill_input = [this](float *input, unsigned int length) { + for (unsigned int i = 0; i < length; ++i) { + *input = input_dist(rng); + input++; + } + }; + + auto fill_label = [this](float *label, unsigned int batch, + unsigned int length) { + unsigned int generated_label = label_dist(rng); + fillLabel(label, length, generated_label); + label += length; + }; + + if (updateIteration(iteration, data_size)) { + *last = true; + return; + } + + float **cur_input_tensor = input; + for (unsigned int i = 0; i < input_shapes.size(); ++i) { + fill_input(*cur_input_tensor, input_shapes.at(i).getFeatureLen()); + cur_input_tensor++; + } + + float **cur_label_tensor = label; + for (unsigned int i = 0; i < output_shapes.size(); ++i) { + fill_label(*label, output_shapes.at(i).batch(), + output_shapes.at(i).getFeatureLen()); + cur_label_tensor++; + } +} + +Cifar100DataLoader::Cifar100DataLoader(const std::string &path, int batch_size, + int splits) : + batch(batch_size), + current_iteration(0), + file(path, std::ios::binary | std::ios::ate) { + constexpr char error_msg[] = "failed to create dataloader, reason: "; + + NNTR_THROW_IF(!file.good(), std::invalid_argument) + << error_msg << " Cannot open file"; + + auto pos = file.tellg(); + NNTR_THROW_IF((pos % Cifar100DataLoader::SampleSize != 0), + std::invalid_argument) + << error_msg << " Given file does not align with the format"; + + auto data_size = pos / (Cifar100DataLoader::SampleSize * splits); + idxes = std::vector(data_size); + std::cout << "path: " << path << '\n'; + std::cout << "data_size: " << data_size << '\n'; + std::iota(idxes.begin(), idxes.end(), 0); + std::shuffle(idxes.begin(), idxes.end(), rng); + + /// @note this truncates the remaining data of less than the batch size + iteration_per_epoch = data_size; +} + +void Cifar100DataLoader::next(float **input, float **label, bool *last) { + /// @note below logic assumes a single input and the fine label is used + + auto fill_one_sample = [this](float *input_, float *label_, int index) { + const size_t error_buflen = 102; + char error_buf[error_buflen]; + NNTR_THROW_IF(!file.good(), std::invalid_argument) + << "file is not good, reason: " + << strerror_r(errno, error_buf, error_buflen); + file.seekg(index * Cifar100DataLoader::SampleSize, std::ios_base::beg); + + uint8_t current_label; + uint8_t fine_label; // it doesn't need for our application, so abandon it + file.read(reinterpret_cast(&fine_label), sizeof(uint8_t)); + file.read(reinterpret_cast(¤t_label), sizeof(uint8_t)); + + fillLabel(label_, Cifar100DataLoader::NumClass, current_label); + + for (unsigned int i = 0; i < Cifar100DataLoader::ImageSize; ++i) { + uint8_t data; + file.read(reinterpret_cast(&data), sizeof(uint8_t)); + *input_ = data / 255.f; + input_++; + } + }; + + fill_one_sample(*input, *label, idxes[current_iteration]); + current_iteration++; + if (current_iteration < iteration_per_epoch) { + *last = false; + } else { + *last = true; + current_iteration = 0; + std::shuffle(idxes.begin(), idxes.end(), rng); + } +} + +} // namespace nntrainer::util diff --git a/benchmarks/fake_data_gen/fake_data_gen.h b/benchmarks/fake_data_gen/fake_data_gen.h new file mode 100644 index 0000000000..10083620f6 --- /dev/null +++ b/benchmarks/fake_data_gen/fake_data_gen.h @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2020 Jihoon Lee + * + * @file cifar_dataloader.h + * @date 24 Jun 2021 + * @brief dataloader for cifar 100 + * @see https://github.com/nnstreamer/nntrainer + * @author Jihoon Lee + * @bug No known bugs except for NYI items + */ +#include + +#include +#include +#include +#include + +namespace nntrainer::util { + +using TensorDim = ml::train::TensorDim; + +/** + * @brief DataLoader interface used to load cifar data + */ +class DataLoader { +public: + /** + * @brief Destroy the Data Loader object + */ + virtual ~DataLoader() {} + + /** + * @brief create an iteration to fed to the generator callback + * + * @param[out] input list of inputs that is already allocated by nntrainer, + * and this function is obliged to fill + * @param[out] label list of label that is already allocated by nntrainer, and + * this function is obliged to fill + * @param[out] last optional property to set when the epoch has finished + */ + virtual void next(float **input, float **label, bool *last) = 0; + +protected: + std::mt19937 rng; +}; + +/** + * @brief RandomData Generator + * + */ +class RandomDataLoader final : public DataLoader { +public: + /** + * @brief Construct a new Random Data Loader object + * + * @param input_shapes input_shapes with appropriate batch + * @param output_shapes label_shapes with appropriate batch + * @param iteration iteration per epoch + */ + RandomDataLoader(const std::vector &input_shapes, + const std::vector &output_shapes, int iteration); + + /** + * @brief Destroy the Random Data Loader object + */ + ~RandomDataLoader() {} + + /** + * @copydoc void DataLoader::next(float **input, float**label, bool *last) + */ + void next(float **input, float **label, bool *last); + +private: + unsigned int iteration; + unsigned int data_size; + + std::vector input_shapes; + std::vector output_shapes; + + std::uniform_int_distribution input_dist; + std::uniform_int_distribution label_dist; +}; + +/** + * @brief Cifar100DataLoader class + */ +class Cifar100DataLoader final : public DataLoader { +public: + /** + * @brief Construct a new Cifar100 Data Loader object + * + * @param path path to read from + * @param batch_size batch_size of current model + * @param splits split divisor of the file 1 means using whole data, 2 means + * half of the data, 10 means 10% of the data + */ + Cifar100DataLoader(const std::string &path, int batch_size, int splits); + + /** + * @brief Destroy the Cifar100 Data Loader object + */ + ~Cifar100DataLoader() {} + + /** + * @copydoc void DataLoader::next(float **input, float**label, bool *last) + */ + void next(float **input, float **label, bool *last); + +private: + inline static constexpr int ImageSize = 3 * 32 * 32; + inline static constexpr int NumClass = 100; + inline static constexpr int SampleSize = + 4 * (3 * 32 * 32 + 2); /**< 1 coarse label, 1 fine label, pixel size */ + + unsigned int batch; + unsigned int current_iteration; + unsigned int iteration_per_epoch; + + std::ifstream file; + std::vector idxes; /**< index information for one epoch */ +}; + +} // namespace nntrainer::util diff --git a/benchmarks/fake_data_gen/meson.build b/benchmarks/fake_data_gen/meson.build new file mode 100644 index 0000000000..945bd74dc6 --- /dev/null +++ b/benchmarks/fake_data_gen/meson.build @@ -0,0 +1,2 @@ +fake_datagen_path = meson.current_source_dir() +fake_datagen_include_dir = include_directories('.') diff --git a/benchmarks/meson.build b/benchmarks/meson.build index 02633ec96a..026677d0c6 100644 --- a/benchmarks/meson.build +++ b/benchmarks/meson.build @@ -1 +1,2 @@ +subdir('fake_data_gen') subdir('benchmark_application') diff --git a/meson.build b/meson.build index e1874374f7..d077138b3e 100644 --- a/meson.build +++ b/meson.build @@ -477,4 +477,4 @@ endif if get_option('enable-benchmarks') subdir('benchmarks') -endif \ No newline at end of file +endif