Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Benchmark] Add Google Benchmarks & gitaction #2708

Merged
merged 5 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/ubuntu_benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Ubuntu Benchmarks

on:
schedule:
- cron: '0 2 * * *'

jobs:
meson_test:

runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04, ubuntu-22.04 ]

steps:
- uses: actions/checkout@v4
- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: install additional package from PPA for testing
run: sudo add-apt-repository -y ppa:nnstreamer/ppa && sudo apt-get update
- name: install minimal requirements
run: sudo apt-get update && sudo apt-get install -y gcc g++ pkg-config libopenblas-dev libiniparser-dev libjsoncpp-dev libcurl3-dev tensorflow2-lite-dev nnstreamer-dev libglib2.0-dev libgstreamer1.0-dev libgtest-dev ml-api-common-dev flatbuffers-compiler ml-inference-api-dev libunwind-dev libbenchmark-dev
- name: install additional packages for features
run: sudo apt-get install -y python3-dev python3-numpy python3
- name: install build systems
run: sudo apt install meson ninja-build
- run: meson setup build/
env:
CC: gcc
- run: |
meson \
--buildtype=plain \
--prefix=/usr \
--sysconfdir=/etc \
--libdir=lib/x86_64-linux-gnu \
--bindir=lib/nntrainer/bin \
--includedir=include \
-Dinstall-app=false \
-Dreduce-tolerance=false \
-Denable-debug=true \
-Dml-api-support=enabled \
-Denable-nnstreamer-tensor-filter=enabled \
-Denable-nnstreamer-tensor-trainer=enabled \
-Denable-nnstreamer-backbone=true \
-Dcapi-ml-common-actual=capi-ml-common \
-Dcapi-ml-inference-actual=capi-ml-inference \
-Denable-capi=enabled \
-Denable-benchmarks=true \
-Denable-app=false \
build_benchmarks
- run: ninja -C build_benchmarks
- name: run Benchmarks_ResNet
run: cd ./build_benchmarks/benchmarks/benchmark_application && ./Benchmark_ResNet
305 changes: 305 additions & 0 deletions benchmarks/benchmark_application/benchmark_resnet.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file benchmark_resnet.cpp
* @date 15 Aug 2024
* @brief benchmark test for resnet application
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
*/
#include <array>
#include <iostream>
#include <memory>
#include <sstream>
#include <vector>

#include <layer.h>
#include <model.h>
#include <optimizer.h>

#include "benchmark/benchmark.h"
#include <fake_data_gen.h>

using LayerHandle = std::shared_ptr<ml::train::Layer>;
using ModelHandle = std::unique_ptr<ml::train::Model>;

using UserDataType = std::unique_ptr<nntrainer::util::DataLoader>;

uint64_t get_cpu_freq() {
unsigned int freq = 0;
char cur_cpu_name[512];
int cpu = sched_getcpu();
snprintf(cur_cpu_name, sizeof(cur_cpu_name),
"/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq", cpu);

FILE *f = fopen(cur_cpu_name, "r");
if (f != nullptr) {
if (fscanf(f, "%d", &freq) != 0) {
fclose(f);
return uint64_t(freq) * 1000;
}
fclose(f);
}
return 0;
}

/** cache loss values post training for test */
float training_loss = 0.0;
float validation_loss = 0.0;

/**
* @brief make "key=value" from key and value
*
* @tparam T type of a value
* @param key key
* @param value value
* @return std::string with "key=value"
*/
template <typename T>
static std::string withKey(const std::string &key, const T &value) {
std::stringstream ss;
ss << key << "=" << value;
return ss.str();
}

template <typename T>
static std::string withKey(const std::string &key,
std::initializer_list<T> value) {
if (std::empty(value)) {
throw std::invalid_argument("empty data cannot be converted");
}

std::stringstream ss;
ss << key << "=";

auto iter = value.begin();
for (; iter != value.end() - 1; ++iter) {
ss << *iter << ',';
}
ss << *iter;

return ss.str();
}

/**
* @brief resnet block
*
* @param block_name name of the block
* @param input_name name of the input
* @param filters number of filters
* @param kernel_size number of kernel_size
* @param downsample downsample to make output size 0
* @return std::vector<LayerHandle> vectors of layers
*/
std::vector<LayerHandle> resnetBlock(const std::string &block_name,
const std::string &input_name, int filters,
int kernel_size, bool downsample) {
using ml::train::createLayer;

auto scoped_name = [&block_name](const std::string &layer_name) {
return block_name + "/" + layer_name;
};
auto with_name = [&scoped_name](const std::string &layer_name) {
return withKey("name", scoped_name(layer_name));
};

auto create_conv = [&with_name, filters](const std::string &name,
int kernel_size, int stride,
const std::string &padding,
const std::string &input_layer) {
std::vector<std::string> props{
with_name(name),
withKey("stride", {stride, stride}),
withKey("filters", filters),
withKey("kernel_size", {kernel_size, kernel_size}),
withKey("padding", padding),
withKey("input_layers", input_layer)};

return createLayer("conv2d", props);
};

LayerHandle a1 = create_conv("a1", 3, downsample ? 2 : 1,
downsample ? "1,1" : "same", input_name);
LayerHandle a2 =
createLayer("batch_normalization",
{with_name("a2"), withKey("activation", "relu"),
withKey("momentum", "0.9"), withKey("epsilon", "0.00001")});
LayerHandle a3 = create_conv("a3", 3, 1, "same", scoped_name("a2"));

/** skip path */
LayerHandle b1 = nullptr;
if (downsample) {
b1 = create_conv("b1", 1, 2, "0,0", input_name);
}

const std::string skip_name = b1 ? scoped_name("b1") : input_name;

LayerHandle c1 = createLayer(
"Addition",
{with_name("c1"), withKey("input_layers", {scoped_name("a3"), skip_name})});

LayerHandle c2 =
createLayer("batch_normalization",
{withKey("name", block_name), withKey("activation", "relu"),
withKey("momentum", "0.9"), withKey("epsilon", "0.00001"),
withKey("trainable", "false")});

if (downsample) {
return {b1, a1, a2, a3, c1, c2};
} else {
return {a1, a2, a3, c1, c2};
}
}

/**
* @brief Create resnet 18
*
* @return vector of layers that contain full graph of resnet18
*/
std::vector<LayerHandle> createResnet18Graph() {
using ml::train::createLayer;

std::vector<LayerHandle> layers;

layers.push_back(createLayer(
"input", {withKey("name", "input0"), withKey("input_shape", "3:32:32")}));

layers.push_back(createLayer(
"conv2d", {withKey("name", "conv0"), withKey("filters", 64),
withKey("kernel_size", {3, 3}), withKey("stride", {1, 1}),
withKey("padding", "same"), withKey("bias_initializer", "zeros"),
withKey("weight_initializer", "xavier_uniform")}));

layers.push_back(createLayer(
"batch_normalization",
{withKey("name", "first_bn_relu"), withKey("activation", "relu"),
withKey("momentum", "0.9"), withKey("epsilon", "0.00001")}));

std::vector<std::vector<LayerHandle>> blocks;

blocks.push_back(resnetBlock("conv1_0", "first_bn_relu", 64, 3, false));
blocks.push_back(resnetBlock("conv1_1", "conv1_0", 64, 3, false));
blocks.push_back(resnetBlock("conv2_0", "conv1_1", 128, 3, true));
blocks.push_back(resnetBlock("conv2_1", "conv2_0", 128, 3, false));
blocks.push_back(resnetBlock("conv3_0", "conv2_1", 256, 3, true));
blocks.push_back(resnetBlock("conv3_1", "conv3_0", 256, 3, false));
blocks.push_back(resnetBlock("conv4_0", "conv3_1", 512, 3, true));
blocks.push_back(resnetBlock("conv4_1", "conv4_0", 512, 3, false));

for (auto &block : blocks) {
layers.insert(layers.end(), block.begin(), block.end());
}

layers.push_back(createLayer(
"pooling2d", {withKey("name", "last_p1"), withKey("pooling", "average"),
withKey("pool_size", {4, 4}), withKey("stride", "4,4")}));

layers.push_back(createLayer("flatten", {withKey("name", "last_f1")}));
layers.push_back(
createLayer("fully_connected",
{withKey("unit", 100), withKey("activation", "softmax")}));

return layers;
}

ModelHandle createResnet18(bool pre_trained = false) {
ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET,
{withKey("loss", "cross")});

for (auto &layer : createResnet18Graph()) {
model->addLayer(layer);
}

return model;
}

int trainData_cb(float **input, float **label, bool *last, void *user_data) {
auto data = reinterpret_cast<nntrainer::util::DataLoader *>(user_data);

data->next(input, label, last);
return 0;
}

int validData_cb(float **input, float **label, bool *last, void *user_data) {
auto data = reinterpret_cast<nntrainer::util::DataLoader *>(user_data);

data->next(input, label, last);
return 0;
}

void createAndRun(unsigned int epochs, unsigned int batch_size,
UserDataType &train_user_data,
UserDataType &valid_user_data) {

// setup model
ModelHandle model = createResnet18();
model->setProperty(
{withKey("batch_size", batch_size), withKey("epochs", epochs)});

auto optimizer = ml::train::createOptimizer("adam", {"learning_rate=0.001"});

model->setOptimizer(std::move(optimizer));
model->compile();
model->initialize();

auto dataset_train = ml::train::createDataset(
ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get());
auto dataset_valid = ml::train::createDataset(
ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get());

model->setDataset(ml::train::DatasetModeType::MODE_TRAIN,
std::move(dataset_train));
model->setDataset(ml::train::DatasetModeType::MODE_VALID,
std::move(dataset_valid));

model->train();
}

std::array<UserDataType, 2>
createFakeDataGenerator(unsigned int batch_size,
unsigned int simulated_data_size,
unsigned int data_split) {
UserDataType train_data(new nntrainer::util::RandomDataLoader(
{{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}},
simulated_data_size / data_split));
UserDataType valid_data(new nntrainer::util::RandomDataLoader(
{{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}},
simulated_data_size / data_split));

return {std::move(train_data), std::move(valid_data)};
}

std::array<UserDataType, 2>
createRealDataGenerator(const std::string &directory, unsigned int batch_size,
unsigned int data_split) {

UserDataType train_data(new nntrainer::util::Cifar100DataLoader(
directory + "/train.bin", batch_size, data_split));
UserDataType valid_data(new nntrainer::util::Cifar100DataLoader(
directory + "/test.bin", batch_size, data_split));

return {std::move(train_data), std::move(valid_data)};
}

static void Test_ResnetFull(benchmark::State &state) {

unsigned int batch_size = 1;
unsigned int data_split = 128;
unsigned int epoch = 10;

std::cout << "batch_size: " << batch_size << " data_split: " << data_split
<< " epoch: " << epoch << std::endl;

std::array<UserDataType, 2> user_datas;
user_datas = createFakeDataGenerator(batch_size, 512, data_split);
auto &[train_user_data, valid_user_data] = user_datas;
auto check_freq = get_cpu_freq();
state.counters["check_freq"] = check_freq;
for (auto _ : state) {
createAndRun(epoch, batch_size, train_user_data, valid_user_data);
}
}

BENCHMARK(Test_ResnetFull);
BENCHMARK_MAIN();
13 changes: 13 additions & 0 deletions benchmarks/benchmark_application/meson.build
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
build_root = meson.build_root()

sources = ['benchmark_resnet.cpp',
fake_datagen_path / 'fake_data_gen.cpp']

resnet_dependencies = [nntrainer_dep,
nntrainer_ccapi_dep,
benchmark_dep, ]

executable('Benchmark_ResNet',
sources,
include_directories : [include_directories('.'), fake_datagen_include_dir],
dependencies : resnet_dependencies)
Loading
Loading