Skip to content

Commit

Permalink
display: add custom dispatch pool, remove OpenMP entirely
Browse files Browse the repository at this point in the history
  • Loading branch information
liamwhite committed Nov 6, 2024
1 parent 92a330f commit b3c6d96
Show file tree
Hide file tree
Showing 10 changed files with 325 additions and 110 deletions.
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ option(ENABLE_LCMS "Compile with LCMS support" ON)
option(WITH_SVG2 "Compile with support for new SVG2 features" ON)
option(WITH_LPETOOL "Compile with LPE Tool" OFF)
option(LPE_ENABLE_TEST_EFFECTS "Compile with test experimental LPEs enabled" OFF)
option(WITH_OPENMP "Compile with OpenMP support" ON)
option(WITH_PROFILING "Turn on profiling" OFF) # Set to true if compiler/linker should enable profiling
option(BUILD_SHARED_LIBS "Compile libraries as shared and not static" ON)

Expand Down Expand Up @@ -287,7 +286,6 @@ message("WITH_LIBCDR: ${WITH_LIBCDR}")
message("WITH_LIBVISIO: ${WITH_LIBVISIO}")
message("WITH_LIBWPG: ${WITH_LIBWPG}")
message("WITH_NLS: ${WITH_NLS}")
message("WITH_OPENMP: ${WITH_OPENMP}")
message("WITH_JEMALLOC: ${WITH_JEMALLOC}")
message("WITH_ASAN: ${WITH_ASAN}")
message("WITH_INTERNAL_2GEOM: ${WITH_INTERNAL_2GEOM}")
Expand Down
20 changes: 0 additions & 20 deletions CMakeScripts/DefineDependsandFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -391,26 +391,6 @@ list(APPEND INKSCAPE_INCS_SYS ${LIBXML2_INCLUDE_DIR})
list(APPEND INKSCAPE_LIBS ${LIBXML2_LIBRARIES})
add_definitions(${LIBXML2_DEFINITIONS})

if(WITH_OPENMP)
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
list(APPEND INKSCAPE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
if(APPLE OR (MINGW AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
list(APPEND INKSCAPE_LIBS "-lomp")
endif()
mark_as_advanced(OpenMP_C_FLAGS)
mark_as_advanced(OpenMP_CXX_FLAGS)
# '-fopenmp' is in OpenMP_C_FLAGS, OpenMP_CXX_FLAGS and implies '-lgomp'
# uncomment explicit linking below if still needed:
set(HAVE_OPENMP ON)
#list(APPEND INKSCAPE_LIBS "-lgomp") # FIXME
else()
set(HAVE_OPENMP OFF)
set(WITH_OPENMP OFF)
endif()
endif()

find_package(ZLIB REQUIRED)
list(APPEND INKSCAPE_INCS_SYS ${ZLIB_INCLUDE_DIRS})
list(APPEND INKSCAPE_LIBS ${ZLIB_LIBRARIES})
Expand Down
3 changes: 0 additions & 3 deletions config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@
/* Define to 1 if you have the <malloc.h> header file. */
#cmakedefine HAVE_MALLOC_H 1

/* Use OpenMP (via cmake) */
#cmakedefine HAVE_OPENMP 1

/* Use libpoppler for direct PDF import */
#cmakedefine HAVE_POPPLER 1

Expand Down
2 changes: 2 additions & 0 deletions src/display/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
set(display_SRC
cairo-utils.cpp
curve.cpp
dispatch-pool.cpp
drawing-context.cpp
drawing-group.cpp
drawing-image.cpp
Expand Down Expand Up @@ -67,6 +68,7 @@ set(display_SRC
cairo-templates.h
cairo-utils.h
curve.h
dispatch-pool.h
drawing-context.h
drawing-group.h
drawing-image.h
Expand Down
55 changes: 17 additions & 38 deletions src/display/cairo-templates.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,12 @@
#ifndef SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H
#define SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H

#ifdef HAVE_CONFIG_H
# include "config.h" // only include where actually required!
#endif

#include <glib.h>

#ifdef HAVE_OPENMP
#include <omp.h>
#include "dispatch-pool.h"

// single-threaded operation if the number of pixels is below this threshold
static const int OPENMP_THRESHOLD = 2048;
#endif
static const int POOL_THRESHOLD = 2048;

#include <cmath>
#include <algorithm>
Expand Down Expand Up @@ -69,20 +64,14 @@ void ink_cairo_surface_blend_internal(cairo_surface_t *out, cairo_surface_t *in1
surface_accessor<Acc2> acc_in2(in2);

// NOTE
// OpenMP probably doesn't help much here.
// This probably doesn't help much here.
// It would be better to render more than 1 tile at a time.
#if HAVE_OPENMP
int const num_threads = get_num_filter_threads();
#endif

#if HAVE_OPENMP
#pragma omp parallel for if((w * h) > OPENMP_THRESHOLD) num_threads(num_threads)
#endif
for (int i = 0; i < h; ++i) {
auto const pool = get_global_dispatch_pool();
pool->dispatch_threshold(h, (w * h) > POOL_THRESHOLD, [&](int i, int) {
for (int j = 0; j < w; ++j) {
acc_out.set(j, i, blend(acc_in1.get(j, i), acc_in2.get(j, i)));
}
}
});
}

template <typename AccOut, typename AccIn, typename Filter>
Expand All @@ -92,20 +81,14 @@ void ink_cairo_surface_filter_internal(cairo_surface_t *out, cairo_surface_t *in
surface_accessor<AccIn> acc_in(in);

// NOTE
// OpenMP probably doesn't help much here.
// This probably doesn't help much here.
// It would be better to render more than 1 tile at a time.
#if HAVE_OPENMP
int const num_threads = get_num_filter_threads();
#endif

#if HAVE_OPENMP
#pragma omp parallel for if((w * h) > OPENMP_THRESHOLD) num_threads(num_threads)
#endif
for (int i = 0; i < h; ++i) {
auto const pool = get_global_dispatch_pool();
pool->dispatch_threshold(h, (w * h) > POOL_THRESHOLD, [&](int i, int) {
for (int j = 0; j < w; ++j) {
acc_out.set(j, i, filter(acc_in.get(j, i)));
}
}
});
}

template <typename AccOut, typename Synth>
Expand All @@ -114,21 +97,17 @@ void ink_cairo_surface_synthesize_internal(cairo_surface_t *out, int x0, int y0,
surface_accessor<AccOut> acc_out(out);

// NOTE
// OpenMP probably doesn't help much here.
// This probably doesn't help much here.
// It would be better to render more than 1 tile at a time.
#if HAVE_OPENMP
int const num_threads = get_num_filter_threads();
#endif

#if HAVE_OPENMP
int const limit = (x1 - x0) * (y1 - y0);
#pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(num_threads)
#endif
for (int i = y0; i < y1; ++i) {
auto const pool = get_global_dispatch_pool();
pool->dispatch_threshold(y1 - y0, limit > POOL_THRESHOLD, [&](int y, int) {
int const i = y0 + y;

for (int j = x0; j < x1; ++j) {
acc_out.set(j, i, synth(j, i));
}
}
});
}

/**
Expand Down
2 changes: 1 addition & 1 deletion src/display/cairo-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ static int ink_cairo_surface_average_color_internal(cairo_surface_t *surface, do
int stride = cairo_image_surface_get_stride(surface);
unsigned char *data = cairo_image_surface_get_data(surface);

/* TODO convert this to OpenMP somehow */
// TODO parallelize this somehow
for (int y = 0; y < height; ++y, data += stride) {
for (int x = 0; x < width; ++x) {
guint32 px = *reinterpret_cast<guint32*>(data + 4*x);
Expand Down
152 changes: 152 additions & 0 deletions src/display/dispatch-pool.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Author: Liam White
* Copyright (C) 2024 Authors
* Released under GNU GPL v2+, read the file 'COPYING' for more information.
*/

#include "dispatch-pool.h"

#include "cairo-utils.h"

namespace Inkscape {

dispatch_pool::dispatch_pool(int size)
{
int const num_threads = std::max(size, 1) - 1;

_threads.reserve(num_threads);

for (int i = 0; i < num_threads; ++i) {
// local_id of created threads is offset by 1 to allow calling thread to always be 0
_threads.emplace_back([i, this] { thread_func(local_id{i + 1}); });
}
}

dispatch_pool::~dispatch_pool()
{
// TODO C++20: this would be completely trivial with jthread
// TODO C++20: dispatch_pool::~dispatch_pool() = default;
{
std::scoped_lock lk(_lock);
_shutdown = true;
}

_available_cv.notify_all();

for (auto &thread : _threads) {
thread.join();
}
}

void dispatch_pool::dispatch(int count, dispatch_func function)
{
std::scoped_lock lk(_dispatch_lock);
std::unique_lock lk2(_lock);

_available_work = global_id{};
_completed_work = global_id{};
_target_work = global_id{count};
_function = std::move(function);

// Execute the caller's batch, and signal to the next waiting thread
execute_batch(lk2, local_id{}, size());

// Wait for other threads to finish
_completed_cv.wait(lk2, [&] { return _completed_work == _target_work; });

// Release any extra memory held by the function
_function = {};
}

void dispatch_pool::thread_func(local_id id)
{
int const thread_count = size();

std::unique_lock lk(_lock);

// TODO C++20: no need for _shutdown member once stop_token is available
// TODO C++20: while (_cv.wait(lk, stop_token, [&] { ... }))
while (true) {
_available_cv.wait(lk, [&] { return _shutdown || _available_work < _target_work; });

if (_shutdown) {
// When shutdown is requested, stop immediately
return;
}

// Otherwise, execute the batch
execute_batch(lk, id, thread_count);
}
}

void dispatch_pool::execute_batch(std::unique_lock<std::mutex> &lk, local_id id, int thread_count)
{
// Determine how much work to take
global_id const batch_size = (_target_work + thread_count - 1) / thread_count;
global_id const start = _available_work;
global_id const end = std::min(start + batch_size, _target_work);

// Take that much work
_available_work = end;

// Unlock and begin executing the function
{
lk.unlock();

// Now that the lock is released, potentially signal work availability
// to the next waiting thread
_available_cv.notify_one();

// Execute the function
for (global_id index = start; index < end; index++) {
_function(index, id);
}

lk.lock();
}

// Signal completion
_completed_work += (end - start);

if (_completed_work == _target_work) {
_completed_cv.notify_one();
}
}

namespace {

std::mutex g_dispatch_lock;
std::shared_ptr<dispatch_pool> g_dispatch_pool;
int g_dispatch_threads;

} // namespace

std::shared_ptr<dispatch_pool> get_global_dispatch_pool()
{
int const num_threads = get_num_filter_threads();

std::scoped_lock lk(g_dispatch_lock);

if (g_dispatch_pool && num_threads == g_dispatch_threads) {
return g_dispatch_pool;
}

g_dispatch_pool = std::make_shared<dispatch_pool>(num_threads);
g_dispatch_threads = num_threads;

return g_dispatch_pool;
}

} // namespace Inkscape

/*
Local Variables:
mode:c++
c-file-style:"stroustrup"
c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
indent-tabs-mode:nil
fill-column:99
End:
*/
// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 :
Loading

0 comments on commit b3c6d96

Please sign in to comment.