diff --git a/CMakeLists.txt b/CMakeLists.txt
index fcef43f9e0..3765ed4bd9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,7 +112,6 @@ option(ENABLE_LCMS "Compile with LCMS support" ON)
 option(WITH_SVG2 "Compile with support for new SVG2 features" ON)
 option(WITH_LPETOOL "Compile with LPE Tool" OFF)
 option(LPE_ENABLE_TEST_EFFECTS "Compile with test experimental LPEs enabled" OFF)
-option(WITH_OPENMP "Compile with OpenMP support" ON)
 option(WITH_PROFILING "Turn on profiling" OFF) # Set to true if compiler/linker should enable profiling
 option(BUILD_SHARED_LIBS "Compile libraries as shared and not static" ON)
 
@@ -287,7 +286,6 @@ message("WITH_LIBCDR:             ${WITH_LIBCDR}")
 message("WITH_LIBVISIO:           ${WITH_LIBVISIO}")
 message("WITH_LIBWPG:             ${WITH_LIBWPG}")
 message("WITH_NLS:                ${WITH_NLS}")
-message("WITH_OPENMP:             ${WITH_OPENMP}")
 message("WITH_JEMALLOC:           ${WITH_JEMALLOC}")
 message("WITH_ASAN:               ${WITH_ASAN}")
 message("WITH_INTERNAL_2GEOM:     ${WITH_INTERNAL_2GEOM}")
diff --git a/CMakeScripts/DefineDependsandFlags.cmake b/CMakeScripts/DefineDependsandFlags.cmake
index 454b16d59d..bebb49e5e0 100644
--- a/CMakeScripts/DefineDependsandFlags.cmake
+++ b/CMakeScripts/DefineDependsandFlags.cmake
@@ -391,26 +391,6 @@ list(APPEND INKSCAPE_INCS_SYS ${LIBXML2_INCLUDE_DIR})
 list(APPEND INKSCAPE_LIBS ${LIBXML2_LIBRARIES})
 add_definitions(${LIBXML2_DEFINITIONS})
 
-if(WITH_OPENMP)
-    find_package(OpenMP)
-    if(OPENMP_FOUND)
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-        list(APPEND INKSCAPE_CXX_FLAGS  ${OpenMP_CXX_FLAGS})
-        if(APPLE OR (MINGW AND ${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang"))
-            list(APPEND INKSCAPE_LIBS "-lomp")
-        endif()
-        mark_as_advanced(OpenMP_C_FLAGS)
-        mark_as_advanced(OpenMP_CXX_FLAGS)
-        # '-fopenmp' is in OpenMP_C_FLAGS, OpenMP_CXX_FLAGS and implies '-lgomp'
-        # uncomment explicit linking below if still needed:
-        set(HAVE_OPENMP ON)
-        #list(APPEND INKSCAPE_LIBS "-lgomp")  # FIXME
-    else()
-        set(HAVE_OPENMP OFF)
-        set(WITH_OPENMP OFF)
-    endif()
-endif()
-
 find_package(ZLIB REQUIRED)
 list(APPEND INKSCAPE_INCS_SYS ${ZLIB_INCLUDE_DIRS})
 list(APPEND INKSCAPE_LIBS ${ZLIB_LIBRARIES})
diff --git a/config.h.cmake b/config.h.cmake
index 982547b162..be0d833ad3 100644
--- a/config.h.cmake
+++ b/config.h.cmake
@@ -41,9 +41,6 @@
 /* Define to 1 if you have the <malloc.h> header file. */
 #cmakedefine HAVE_MALLOC_H 1
 
-/* Use OpenMP (via cmake) */
-#cmakedefine HAVE_OPENMP 1
-
 /* Use libpoppler for direct PDF import */
 #cmakedefine HAVE_POPPLER 1
 
diff --git a/src/display/CMakeLists.txt b/src/display/CMakeLists.txt
index 0e1d1c4908..0ad06d6c08 100644
--- a/src/display/CMakeLists.txt
+++ b/src/display/CMakeLists.txt
@@ -3,6 +3,7 @@
 set(display_SRC
     cairo-utils.cpp
     curve.cpp
+    dispatch-pool.cpp
     drawing-context.cpp
     drawing-group.cpp
     drawing-image.cpp
@@ -67,6 +68,7 @@ set(display_SRC
     cairo-templates.h
     cairo-utils.h
     curve.h
+    dispatch-pool.h
     drawing-context.h
     drawing-group.h
     drawing-image.h
diff --git a/src/display/cairo-templates.h b/src/display/cairo-templates.h
index b781561669..2595327265 100644
--- a/src/display/cairo-templates.h
+++ b/src/display/cairo-templates.h
@@ -13,17 +13,12 @@
 #ifndef SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H
 #define SEEN_INKSCAPE_DISPLAY_CAIRO_TEMPLATES_H
 
-#ifdef HAVE_CONFIG_H
-# include "config.h"  // only include where actually required!
-#endif
-
 #include <glib.h>
 
-#ifdef HAVE_OPENMP
-#include <omp.h>
+#include "dispatch-pool.h"
+
 // single-threaded operation if the number of pixels is below this threshold
-static const int OPENMP_THRESHOLD = 2048;
-#endif
+static const int POOL_THRESHOLD = 2048;
 
 #include <cmath>
 #include <algorithm>
@@ -69,20 +64,14 @@ void ink_cairo_surface_blend_internal(cairo_surface_t *out, cairo_surface_t *in1
     surface_accessor<Acc2> acc_in2(in2);
 
     // NOTE
-    // OpenMP probably doesn't help much here.
+    // This probably doesn't help much here.
     // It would be better to render more than 1 tile at a time.
-    #if HAVE_OPENMP
-    int const num_threads = get_num_filter_threads();
-    #endif
-
-    #if HAVE_OPENMP
-    #pragma omp parallel for if((w * h) > OPENMP_THRESHOLD) num_threads(num_threads)
-    #endif
-    for (int i = 0; i < h; ++i) {
+    auto const pool = get_global_dispatch_pool();
+    pool->dispatch_threshold(h, (w * h) > POOL_THRESHOLD, [&](int i, int) {
         for (int j = 0; j < w; ++j) {
             acc_out.set(j, i, blend(acc_in1.get(j, i), acc_in2.get(j, i)));
         }
-    }
+    });
 }
 
 template <typename AccOut, typename AccIn, typename Filter>
@@ -92,20 +81,14 @@ void ink_cairo_surface_filter_internal(cairo_surface_t *out, cairo_surface_t *in
     surface_accessor<AccIn> acc_in(in);
 
     // NOTE
-    // OpenMP probably doesn't help much here.
+    // This probably doesn't help much here.
     // It would be better to render more than 1 tile at a time.
-    #if HAVE_OPENMP
-    int const num_threads = get_num_filter_threads();
-    #endif
-
-    #if HAVE_OPENMP
-    #pragma omp parallel for if((w * h) > OPENMP_THRESHOLD) num_threads(num_threads)
-    #endif
-    for (int i = 0; i < h; ++i) {
+    auto const pool = get_global_dispatch_pool();
+    pool->dispatch_threshold(h, (w * h) > POOL_THRESHOLD, [&](int i, int) {
         for (int j = 0; j < w; ++j) {
             acc_out.set(j, i, filter(acc_in.get(j, i)));
         }
-    }
+    });
 }
 
 template <typename AccOut, typename Synth>
@@ -114,21 +97,17 @@ void ink_cairo_surface_synthesize_internal(cairo_surface_t *out, int x0, int y0,
     surface_accessor<AccOut> acc_out(out);
 
     // NOTE
-    // OpenMP probably doesn't help much here.
+    // This probably doesn't help much here.
     // It would be better to render more than 1 tile at a time.
-    #if HAVE_OPENMP
-    int const num_threads = get_num_filter_threads();
-    #endif
-
-    #if HAVE_OPENMP
     int const limit = (x1 - x0) * (y1 - y0);
-    #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(num_threads)
-    #endif
-    for (int i = y0; i < y1; ++i) {
+    auto const pool = get_global_dispatch_pool();
+    pool->dispatch_threshold(y1 - y0, limit > POOL_THRESHOLD, [&](int y, int) {
+        int const i = y0 + y;
+
         for (int j = x0; j < x1; ++j) {
             acc_out.set(j, i, synth(j, i));
         }
-    }
+    });
 }
 
 /**
diff --git a/src/display/cairo-utils.cpp b/src/display/cairo-utils.cpp
index 9271edf3a9..6ef55e4e55 100644
--- a/src/display/cairo-utils.cpp
+++ b/src/display/cairo-utils.cpp
@@ -1294,7 +1294,7 @@ static int ink_cairo_surface_average_color_internal(cairo_surface_t *surface, do
     int stride = cairo_image_surface_get_stride(surface);
     unsigned char *data = cairo_image_surface_get_data(surface);
 
-    /* TODO convert this to OpenMP somehow */
+    // TODO parallelize this somehow
     for (int y = 0; y < height; ++y, data += stride) {
         for (int x = 0; x < width; ++x) {
             guint32 px = *reinterpret_cast<guint32*>(data + 4*x);
diff --git a/src/display/dispatch-pool.cpp b/src/display/dispatch-pool.cpp
new file mode 100644
index 0000000000..25daddcd3e
--- /dev/null
+++ b/src/display/dispatch-pool.cpp
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Liam White
+ * Copyright (C) 2024 Authors
+ * Released under GNU GPL v2+, read the file 'COPYING' for more information.
+ */
+
+#include "dispatch-pool.h"
+
+#include "cairo-utils.h"
+
+namespace Inkscape {
+
+dispatch_pool::dispatch_pool(int size)
+{
+    int const num_threads = std::max(size, 1) - 1;
+
+    _threads.reserve(num_threads);
+
+    for (int i = 0; i < num_threads; ++i) {
+        // local_id of created threads is offset by 1 to allow calling thread to always be 0
+        _threads.emplace_back([i, this] { thread_func(local_id{i + 1}); });
+    }
+}
+
+dispatch_pool::~dispatch_pool()
+{
+    // TODO C++20: this would be completely trivial with jthread
+    // TODO C++20: dispatch_pool::~dispatch_pool() = default;
+    {
+        std::scoped_lock lk(_lock);
+        _shutdown = true;
+    }
+
+    _available_cv.notify_all();
+
+    for (auto &thread : _threads) {
+        thread.join();
+    }
+}
+
+void dispatch_pool::dispatch(int count, dispatch_func function)
+{
+    std::scoped_lock lk(_dispatch_lock);
+    std::unique_lock lk2(_lock);
+
+    _available_work = global_id{};
+    _completed_work = global_id{};
+    _target_work = global_id{count};
+    _function = std::move(function);
+
+    // Execute the caller's batch, and signal to the next waiting thread
+    execute_batch(lk2, local_id{}, size());
+
+    // Wait for other threads to finish
+    _completed_cv.wait(lk2, [&] { return _completed_work == _target_work; });
+
+    // Release any extra memory held by the function
+    _function = {};
+}
+
+void dispatch_pool::thread_func(local_id id)
+{
+    int const thread_count = size();
+
+    std::unique_lock lk(_lock);
+
+    // TODO C++20: no need for _shutdown member once stop_token is available
+    // TODO C++20: while (_cv.wait(lk, stop_token, [&] { ... }))
+    while (true) {
+        _available_cv.wait(lk, [&] { return _shutdown || _available_work < _target_work; });
+
+        if (_shutdown) {
+            // When shutdown is requested, stop immediately
+            return;
+        }
+
+        // Otherwise, execute the batch
+        execute_batch(lk, id, thread_count);
+    }
+}
+
+void dispatch_pool::execute_batch(std::unique_lock<std::mutex> &lk, local_id id, int thread_count)
+{
+    // Determine how much work to take
+    global_id const batch_size = (_target_work + thread_count - 1) / thread_count;
+    global_id const start = _available_work;
+    global_id const end = std::min(start + batch_size, _target_work);
+
+    // Take that much work
+    _available_work = end;
+
+    // Unlock and begin executing the function
+    {
+        lk.unlock();
+
+        // Now that the lock is released, potentially signal work availability
+        // to the next waiting thread
+        _available_cv.notify_one();
+
+        // Execute the function
+        for (global_id index = start; index < end; index++) {
+            _function(index, id);
+        }
+
+        lk.lock();
+    }
+
+    // Signal completion
+    _completed_work += (end - start);
+
+    if (_completed_work == _target_work) {
+        _completed_cv.notify_one();
+    }
+}
+
+namespace {
+
+std::mutex g_dispatch_lock;
+std::shared_ptr<dispatch_pool> g_dispatch_pool;
+int g_dispatch_threads;
+
+} // namespace
+
+std::shared_ptr<dispatch_pool> get_global_dispatch_pool()
+{
+    int const num_threads = get_num_filter_threads();
+
+    std::scoped_lock lk(g_dispatch_lock);
+
+    if (g_dispatch_pool && num_threads == g_dispatch_threads) {
+        return g_dispatch_pool;
+    }
+
+    g_dispatch_pool = std::make_shared<dispatch_pool>(num_threads);
+    g_dispatch_threads = num_threads;
+
+    return g_dispatch_pool;
+}
+
+} // namespace Inkscape
+
+/*
+  Local Variables:
+  mode:c++
+  c-file-style:"stroustrup"
+  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
+  indent-tabs-mode:nil
+  fill-column:99
+  End:
+*/
+// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 :
diff --git a/src/display/dispatch-pool.h b/src/display/dispatch-pool.h
new file mode 100644
index 0000000000..b240001948
--- /dev/null
+++ b/src/display/dispatch-pool.h
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Liam White
+ * Copyright (C) 2024 Authors
+ * Released under GNU GPL v2+, read the file 'COPYING' for more information.
+ */
+
+#ifndef INKSCAPE_DISPLAY_DISPATCH_POOL_H
+#define INKSCAPE_DISPLAY_DISPATCH_POOL_H
+
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace Inkscape {
+
+/**
+ * General-purpose, parallel thread dispatch mechanism.
+ *
+ * A dispatch is a compute job which is parameterized by a counter. It can also be thought of
+ * as a way to parallelize a for loop. For example, the following single-threaded loop
+ *
+ *     for (int i = 0; i < count; ++i) {
+ *         do_work(i);
+ *     }
+ *
+ * can be rewritten to use a dispatch_pool and operate in parallel like this:
+ *
+ *     pool.dispatch(count, [&](int i, int local_id) {
+ *         do_work(i);
+ *     });
+ *
+ * Finally, it is also possible to perform all jobs on the calling thread unless a threshold
+ * condition is met (like dispatch size). This can be used if threading the operation would be
+ * less efficient unless the work is at least a certain size:
+ *
+ *     pool.dispatch_threshold(count, count > 1024, [&](int i, int local_id) {
+ *         do_work(i);
+ *     });
+ *
+ * Unlike boost's asio::thread_pool, which pushes work for threads onto a queue, this class only
+ * supports operation via a counter. The simpler design allows dispatching a very large amount of
+ * work (potentially millions of jobs, for every pixel in a megapixel image) with constant
+ * memory and space used.
+ *
+ * A pool's thread count is fixed upon construction and cannot change during operation. If you
+ * allocate work buffers for each thread in the pool, you can use the size() method to determine
+ * how many threads it has been created with.
+ *
+ * By design, only one dispatch may run at a time. It is safe to call dispatch() from multiple
+ * threads without extra locking.
+ *
+ * Terminology used is designed to loosely follow that of OpenCL kernels or GL/VK compute shaders:
+ * - Global ID within a dispatch refers to the 0-based counter value for a given job.
+ * - Local ID within a dispatch refers to the 0-based index of thread which is processing the job.
+ *   This will always be less than the pool's size().
+ *
+ * The first parameter to the callback is global ID. The second parameter, which is unused in the
+ * example, is the local ID. The local ID is primarily useful if a work buffer is allocated for
+ * each thread in the dispatch_pool ahead of time.
+ */
+class dispatch_pool
+{
+public:
+    using global_id = int;
+    using local_id = int;
+    using dispatch_func = std::function<void(global_id, local_id)>;
+
+    explicit dispatch_pool(int size);
+    ~dispatch_pool();
+
+    void dispatch(int count, dispatch_func function);
+
+    template <typename F>
+    void dispatch_threshold(int count, bool threshold, F &&function)
+    {
+        if (threshold) {
+            dispatch(count, std::forward<F>(function));
+        } else {
+            for (auto i = global_id{}; i < global_id{count}; i++) {
+                function(i, local_id{});
+            }
+        }
+    }
+
+    int size() const
+    {
+        // The calling thread participates in the dispatch
+        return _threads.size() + 1;
+    }
+
+private:
+    void thread_func(local_id id);
+    void execute_batch(std::unique_lock<std::mutex> &lk, local_id id, int thread_count);
+
+private:
+    global_id _available_work{};
+    global_id _completed_work{};
+    global_id _target_work{};
+    bool _shutdown{};
+
+    std::mutex _dispatch_lock;
+    std::mutex _lock;
+    std::condition_variable _available_cv;
+    std::condition_variable _completed_cv;
+    dispatch_func _function;
+    std::vector<std::thread> _threads;
+};
+
+std::shared_ptr<dispatch_pool> get_global_dispatch_pool();
+
+} // namespace Inkscape
+
+#endif // INKSCAPE_DISPLAY_DISPATCH_POOL_H
+
+/*
+  Local Variables:
+  mode:c++
+  c-file-style:"stroustrup"
+  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
+  indent-tabs-mode:nil
+  fill-column:99
+  End:
+*/
+// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:fileencoding=utf-8:textwidth=99 :
diff --git a/src/display/nr-filter-gaussian.cpp b/src/display/nr-filter-gaussian.cpp
index 4e04bfd62f..eccf3b249d 100644
--- a/src/display/nr-filter-gaussian.cpp
+++ b/src/display/nr-filter-gaussian.cpp
@@ -12,21 +12,15 @@
  * Released under GNU GPL v2+, read the file 'COPYING' for more information.
  */
 
-#ifdef HAVE_CONFIG_H
-# include "config.h"  // only include where actually required!
-#endif
-
 #include <algorithm>
 #include <cmath>
 #include <complex>
 #include <cstdlib>
 #include <glib.h>
 #include <limits>
-#if HAVE_OPENMP
-#include <omp.h>
-#endif //HAVE_OPENMP
 
 #include "display/cairo-utils.h"
+#include "display/dispatch-pool.h"
 #include "display/nr-filter-primitive.h"
 #include "display/nr-filter-gaussian.h"
 #include "display/nr-filter-types.h"
@@ -290,7 +284,7 @@ static void
 filter2D_IIR(PT *const dest, int const dstr1, int const dstr2,
              PT const *const src, int const sstr1, int const sstr2,
              int const n1, int const n2, IIRValue const b[N+1], double const M[N*N],
-             IIRValue *const tmpdata[], int const num_threads)
+             IIRValue *const tmpdata[], dispatch_pool &pool)
 {
     assert(src && dest);
 
@@ -302,16 +296,7 @@ filter2D_IIR(PT *const dest, int const dstr1, int const dstr2,
     #define PREMUL_ALPHA_LOOP for(unsigned int c=1; c<PC; ++c)
 #endif
 
-INK_UNUSED(num_threads); // to suppress unused argument compiler warning
-#if HAVE_OPENMP
-#pragma omp parallel for num_threads(num_threads)
-#endif // HAVE_OPENMP
-    for ( int c2 = 0 ; c2 < n2 ; c2++ ) {
-#if HAVE_OPENMP
-        unsigned int tid = omp_get_thread_num();
-#else
-        unsigned int tid = 0;
-#endif // HAVE_OPENMP
+    pool.dispatch(n2, [&](int c2, int tid) {
         // corresponding line in the source and output buffer
         PT const * srcimg = src  + c2*sstr2;
         PT       * dstimg = dest + c2*dstr2 + n1*dstr1;
@@ -357,7 +342,7 @@ INK_UNUSED(num_threads); // to suppress unused argument compiler warning
                 for(unsigned int c=0; c<PC; c++) dstimg[c] = clip_round_cast<PT>(v[0][c]);
             }
         }
-    }
+    });
 }
 
 // Filters over 1st dimension
@@ -367,18 +352,13 @@ template<typename PT, unsigned int PC>
 static void
 filter2D_FIR(PT *const dst, int const dstr1, int const dstr2,
              PT const *const src, int const sstr1, int const sstr2,
-             int const n1, int const n2, FIRValue const *const kernel, int const scr_len, int const num_threads)
+             int const n1, int const n2, FIRValue const *const kernel, int const scr_len, dispatch_pool &pool)
 {
     assert(src && dst);
 
-    // Past pixels seen (to enable in-place operation)
-    PT history[scr_len+1][PC];
-
-INK_UNUSED(num_threads); // suppresses unused argument compiler warning
-#if HAVE_OPENMP
-#pragma omp parallel for num_threads(num_threads) private(history)
-#endif // HAVE_OPENMP
-    for ( int c2 = 0 ; c2 < n2 ; c2++ ) {
+    pool.dispatch(n2, [&](int c2, int) {
+        // Past pixels seen (to enable in-place operation)
+        PT history[scr_len + 1][PC];
 
         // corresponding line in the source buffer
         int const src_line = c2 * sstr2;
@@ -465,12 +445,12 @@ INK_UNUSED(num_threads); // suppresses unused argument compiler warning
                 }
             }
         }
-    }
+    });
 }
 
 static void
 gaussian_pass_IIR(Geom::Dim2 d, double deviation, cairo_surface_t *src, cairo_surface_t *dest,
-    IIRValue **tmpdata, int num_threads)
+    IIRValue **tmpdata, dispatch_pool &pool)
 {
     // Filter variables
     IIRValue b[N+1];  // scaling coefficient + filter coefficients (can be 10.21 fixed point)
@@ -500,13 +480,13 @@ gaussian_pass_IIR(Geom::Dim2 d, double deviation, cairo_surface_t *src, cairo_su
         filter2D_IIR<unsigned char,1,false>(
             cairo_image_surface_get_data(dest), d == Geom::X ? 1 : stride, d == Geom::X ? stride : 1,
             cairo_image_surface_get_data(src),  d == Geom::X ? 1 : stride, d == Geom::X ? stride : 1,
-            w, h, b, M, tmpdata, num_threads);
+            w, h, b, M, tmpdata, pool);
         break;
     case CAIRO_FORMAT_ARGB32: ///< Premultiplied 8 bit RGBA
         filter2D_IIR<unsigned char,4,true>(
             cairo_image_surface_get_data(dest), d == Geom::X ? 4 : stride, d == Geom::X ? stride : 4,
             cairo_image_surface_get_data(src),  d == Geom::X ? 4 : stride, d == Geom::X ? stride : 4,
-            w, h, b, M, tmpdata, num_threads);
+            w, h, b, M, tmpdata, pool);
         break;
     default:
         g_warning("gaussian_pass_IIR: unsupported image format");
@@ -515,7 +495,7 @@ gaussian_pass_IIR(Geom::Dim2 d, double deviation, cairo_surface_t *src, cairo_su
 
 static void
 gaussian_pass_FIR(Geom::Dim2 d, double deviation, cairo_surface_t *src, cairo_surface_t *dest,
-    int num_threads)
+    dispatch_pool &pool)
 {
     int scr_len = _effect_area_scr(deviation);
     // Filter kernel for x direction
@@ -533,13 +513,13 @@ gaussian_pass_FIR(Geom::Dim2 d, double deviation, cairo_surface_t *src, cairo_su
         filter2D_FIR<unsigned char,1>(
             cairo_image_surface_get_data(dest), d == Geom::X ? 1 : stride, d == Geom::X ? stride : 1,
             cairo_image_surface_get_data(src),  d == Geom::X ? 1 : stride, d == Geom::X ? stride : 1,
-            w, h, &kernel[0], scr_len, num_threads);
+            w, h, &kernel[0], scr_len, pool);
         break;
     case CAIRO_FORMAT_ARGB32: ///< Premultiplied 8 bit RGBA
         filter2D_FIR<unsigned char,4>(
             cairo_image_surface_get_data(dest), d == Geom::X ? 4 : stride, d == Geom::X ? stride : 4,
             cairo_image_surface_get_data(src),  d == Geom::X ? 4 : stride, d == Geom::X ? stride : 4,
-            w, h, &kernel[0], scr_len, num_threads);
+            w, h, &kernel[0], scr_len, pool);
         break;
     default:
         g_warning("gaussian_pass_FIR: unsupported image format");
@@ -597,8 +577,9 @@ void FilterGaussian::render_cairo(FilterSlot &slot) const
             bytes_per_pixel = 4; break;
     }
 
+    auto const pool = get_global_dispatch_pool();
     int quality = slot.get_blurquality();
-    int threads = get_num_filter_threads();
+    int threads = pool->size();
     int x_step = 1 << _effect_subsample_step_log2(deviation_x_orig, quality);
     int y_step = 1 << _effect_subsample_step_log2(deviation_y_orig, quality);
     bool resampling = x_step > 1 || y_step > 1;
@@ -647,17 +628,17 @@ void FilterGaussian::render_cairo(FilterSlot &slot) const
 
     if (scr_len_x > 0) {
         if (use_IIR_x) {
-            gaussian_pass_IIR(Geom::X, deviation_x, downsampled, downsampled, tmpdata, threads);
+            gaussian_pass_IIR(Geom::X, deviation_x, downsampled, downsampled, tmpdata, *pool);
         } else {
-            gaussian_pass_FIR(Geom::X, deviation_x, downsampled, downsampled, threads);
+            gaussian_pass_FIR(Geom::X, deviation_x, downsampled, downsampled, *pool);
         }
     }
 
     if (scr_len_y > 0) {
         if (use_IIR_y) {
-            gaussian_pass_IIR(Geom::Y, deviation_y, downsampled, downsampled, tmpdata, threads);
+            gaussian_pass_IIR(Geom::Y, deviation_y, downsampled, downsampled, tmpdata, *pool);
         } else {
-            gaussian_pass_FIR(Geom::Y, deviation_y, downsampled, downsampled, threads);
+            gaussian_pass_FIR(Geom::Y, deviation_y, downsampled, downsampled, *pool);
         }
     }
 
diff --git a/src/display/nr-filter-morphology.cpp b/src/display/nr-filter-morphology.cpp
index 1cc2aadea7..52f09ad3fd 100644
--- a/src/display/nr-filter-morphology.cpp
+++ b/src/display/nr-filter-morphology.cpp
@@ -65,11 +65,9 @@ void morphologicalFilter1D(cairo_surface_t * const input, cairo_surface_t * cons
     int ri = round(radius); // TODO: Support fractional radii?
     int wi = 2*ri+1;
 
-    #if HAVE_OPENMP
-    int limit = w * h;
-    #pragma omp parallel for if(limit > OPENMP_THRESHOLD) num_threads(get_num_filter_threads())
-    #endif // HAVE_OPENMP
-    for (int i = 0; i < h; ++i) {
+    int const limit = w * h;
+    auto const pool = get_global_dispatch_pool();
+    pool->dispatch_threshold(h, limit > POOL_THRESHOLD, [&](int i, int) {
         // TODO: Store position and value in one 32 bit integer? 24 bits should be enough for a position, it would be quite strange to have an image with a width/height of more than 16 million(!).
         std::deque<std::pair<int, unsigned char>> vals[BPP]; // In my tests it was actually slightly faster to allocate it here than allocate it once for all threads and retrieving the correct set based on the thread id.
 
@@ -148,7 +146,7 @@ void morphologicalFilter1D(cairo_surface_t * const input, cairo_surface_t * cons
             }
             if (axis == Geom::Y) out_p += strideout - BPP;
         }
-    }
+    });
 
     cairo_surface_mark_dirty(out);
 }