Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplifying RTNeural AVX usage and other compiler-related tweaks #337

Merged
merged 6 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ juce_add_plugin(BYOD
ProductName "BYOD"
ICON_BIG res/logo.png
NEEDS_MIDI_INPUT True
VST3_AUTO_MANIFEST FALSE

VST2_CATEGORY kPlugCategEffect
VST3_CATEGORIES Fx Distortion
Expand Down
2 changes: 1 addition & 1 deletion modules/JUCE
Submodule JUCE updated 1435 files
2 changes: 1 addition & 1 deletion modules/RTNeural
Submodule RTNeural updated 61 files
+5 −0 RTNeural/CMakeLists.txt
+2 −2 RTNeural/Layer.h
+2 −2 RTNeural/Model.h
+2 −2 RTNeural/ModelT.h
+4 −4 RTNeural/RTNeural.cpp
+4 −0 RTNeural/RTNeural.h
+4 −4 RTNeural/activation/activation.h
+2 −2 RTNeural/activation/activation_eigen.h
+2 −2 RTNeural/activation/activation_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm.h
+1 −1 RTNeural/batchnorm/batchnorm.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d.h
+1 −1 RTNeural/batchnorm/batchnorm2d.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d_eigen.h
+1 −1 RTNeural/batchnorm/batchnorm2d_eigen.tpp
+1 −1 RTNeural/batchnorm/batchnorm2d_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm2d_xsimd.tpp
+1 −1 RTNeural/batchnorm/batchnorm_eigen.h
+1 −1 RTNeural/batchnorm/batchnorm_eigen.tpp
+1 −1 RTNeural/batchnorm/batchnorm_xsimd.h
+1 −1 RTNeural/batchnorm/batchnorm_xsimd.tpp
+8 −8 RTNeural/common.h
+2 −2 RTNeural/conv1d/conv1d.h
+2 −2 RTNeural/conv1d/conv1d.tpp
+2 −2 RTNeural/conv1d/conv1d_eigen.h
+2 −2 RTNeural/conv1d/conv1d_eigen.tpp
+2 −2 RTNeural/conv1d/conv1d_xsimd.h
+2 −2 RTNeural/conv1d/conv1d_xsimd.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless_eigen.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless_eigen.tpp
+1 −1 RTNeural/conv1d_stateless/conv1d_stateless_xsimd.h
+2 −2 RTNeural/conv1d_stateless/conv1d_stateless_xsimd.tpp
+1 −1 RTNeural/conv2d/conv2d.h
+2 −2 RTNeural/conv2d/conv2d.tpp
+1 −1 RTNeural/conv2d/conv2d_eigen.h
+2 −2 RTNeural/conv2d/conv2d_eigen.tpp
+1 −1 RTNeural/conv2d/conv2d_xsimd.h
+2 −2 RTNeural/conv2d/conv2d_xsimd.tpp
+2 −2 RTNeural/dense/dense.h
+2 −2 RTNeural/dense/dense_eigen.h
+2 −2 RTNeural/dense/dense_xsimd.h
+2 −2 RTNeural/gru/gru.h
+2 −2 RTNeural/gru/gru.tpp
+2 −2 RTNeural/gru/gru_eigen.h
+2 −2 RTNeural/gru/gru_eigen.tpp
+2 −2 RTNeural/gru/gru_xsimd.h
+2 −2 RTNeural/gru/gru_xsimd.tpp
+2 −2 RTNeural/lstm/lstm.h
+2 −2 RTNeural/lstm/lstm.tpp
+2 −2 RTNeural/lstm/lstm_eigen.h
+2 −2 RTNeural/lstm/lstm_eigen.tpp
+2 −2 RTNeural/lstm/lstm_xsimd.h
+2 −2 RTNeural/lstm/lstm_xsimd.tpp
+1 −1 RTNeural/maths/maths_eigen.h
+1 −1 RTNeural/maths/maths_stl.h
+1 −1 RTNeural/maths/maths_xsimd.h
+2 −2 RTNeural/model_loader.h
+1 −1 RTNeural/torch_helpers.h
+1 −1 examples/hello_rtneural/Makefile
3 changes: 3 additions & 0 deletions modules/cmake/WarningFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ if(WIN32)
-Wno-cast-function-type
-Wno-range-loop-bind-reference
-Wno-sign-conversion
-Wno-implicit-int-float-conversion
-Wno-implicit-const-int-float-conversion
-Wno-header-hygiene
)
elseif((CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") OR (CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
message(STATUS "Setting MSVC compiler flags")
Expand Down
6 changes: 3 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,11 @@ if (NOT(${JAI_COMPILER} STREQUAL "JAI_COMPILER-NOTFOUND"))
endif()

# AVX/SSE files for accelerated neural nets
make_lib_simd_runtime(rnn_accelerated processors/drive/neural_utils/RNNAccelerated.cpp)
if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../modules/math_approx")
message(STATUS "Using RTNeural with math_approx")
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../modules/math_approx" math_approx)
endif()
make_lib_simd_runtime(rnn_accelerated processors/drive/neural_utils/RNNAccelerated.cpp)
foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
target_link_libraries(${target} PRIVATE config_flags juce::juce_recommended_lto_flags warning_flags)
target_include_directories(${target}
Expand All @@ -191,8 +191,8 @@ foreach(target IN ITEMS rnn_accelerated_sse_or_arm rnn_accelerated_avx)
target_link_libraries(${target} PRIVATE math_approx)
endif()
endforeach()
target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16)
target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32)
target_compile_definitions(rnn_accelerated_sse_or_arm PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=16 RTNEURAL_NAMESPACE=RTNeural_sse_arm)
target_compile_definitions(rnn_accelerated_avx PRIVATE RTNEURAL_DEFAULT_ALIGNMENT=32 RTNEURAL_NAMESPACE=RTNeural_avx)
target_link_libraries(BYOD PRIVATE rnn_accelerated)

# special flags for MSVC
Expand Down
2 changes: 1 addition & 1 deletion src/jai/SharedJaiContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ struct JaiContextWrapper
JaiContextWrapper();
~JaiContextWrapper();

operator jai::Context*() { return internal; }; // NOLINT
operator jai::Context*() { return internal; } // NOLINT

private:
jai::Context* internal = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions src/pch.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wzero-as-null-pointer-constant",
#include <RTNeural/RTNeural.h>
JUCE_END_IGNORE_WARNINGS_GCC_LIKE

JUCE_BEGIN_IGNORE_WARNINGS_GCC_LIKE ("-Wshadow-field-in-constructor")
#include <chowdsp_wdf/chowdsp_wdf.h>
JUCE_END_IGNORE_WARNINGS_GCC_LIKE

#include <ea_variant/ea_variant.h>
#include <sst/cpputils.h>

Expand Down
13 changes: 5 additions & 8 deletions src/processors/drive/GuitarMLAmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,13 @@ class GuitarMLAmp : public BaseProcessor
double processSampleRate = 96000.0;
std::shared_ptr<FileChooser> customModelChooser;

#if JUCE_INTEL
template <int numIns, int hiddenSize>
using GuitarML_LSTM = EA::Variant<
rnn_sse::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>>;
#else
template <int numIns, int hiddenSize>
using GuitarML_LSTM = EA::Variant<
rnn_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>>;
using GuitarML_LSTM = EA::Variant<rnn_sse_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>
#if JUCE_INTEL
,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>
#endif
>;

using LSTM40Cond = GuitarML_LSTM<2, 40>;
using LSTM40NoCond = GuitarML_LSTM<1, 40>;
Expand Down
52 changes: 16 additions & 36 deletions src/processors/drive/neural_utils/RNNAccelerated.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,5 @@
#include "RNNAccelerated.h"

#if __AVX__
#define RTNeural RTNeural_avx
#define xsimd xsimd_avx
#elif __SSE__
#define RTNeural RTNeural_sse
#define xsimd xsimd_sse
#else
#define RTNeural RTNeural_arm
#define xsimd xsimd_arm
#endif

#if __clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
Expand All @@ -37,6 +26,9 @@ struct ApproxMathsProvider
return math_approx::sigmoid<9> (x);
}
};
using RNNMathsProvider = ApproxMathsProvider;
#else
using RNNMathsProvider = RTNEURAL_NAMESPACE::DefaultMathsProvider;
#endif

#include "model_loaders.h"
Expand All @@ -45,34 +37,22 @@ struct ApproxMathsProvider
#pragma GCC diagnostic pop
#endif

#if (__aarch64__ || __arm__)
namespace rnn_arm
{
#elif __AVX__ || (_MSC_VER && BYOD_COMPILING_WITH_AVX)
#if (__MMX__ || __SSE__ || __amd64__) && BYOD_COMPILING_WITH_AVX // INTEL + AVX
namespace rnn_avx
{
#elif __SSE__ || (_MSC_VER && ! BYOD_COMPILING_WITH_AVX)
namespace rnn_sse
{
#else
#error "Unknown or un-supported platform!"
namespace rnn_sse_arm
#endif

{
#if ! (XSIMD_WITH_NEON && BYOD_COMPILING_WITH_AVX)

template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
struct RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::Internal
{
using RecurrentLayerTypeComplete = std::conditional_t<RecurrentLayerType == RecurrentLayerType::LSTMLayer,
#if RTNEURAL_USE_MATH_APPROX
RTNeural::LSTMLayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>,
RTNeural::GRULayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode, ApproxMathsProvider>>;
#else
RTNeural::LSTMLayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode>,
RTNeural::GRULayerT<float, inputSize, hiddenSize, (RTNeural::SampleRateCorrectionMode) SRCMode>>;
#endif
using DenseLayerType = RTNeural::DenseT<float, hiddenSize, 1>;
RTNeural::ModelT<float, inputSize, 1, RecurrentLayerTypeComplete, DenseLayerType> model;
RTNEURAL_NAMESPACE::LSTMLayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode, RNNMathsProvider>,
RTNEURAL_NAMESPACE::GRULayerT<float, inputSize, hiddenSize, (RTNEURAL_NAMESPACE::SampleRateCorrectionMode) SRCMode, RNNMathsProvider>>;
using DenseLayerType = RTNEURAL_NAMESPACE::DenseT<float, hiddenSize, 1>;
RTNEURAL_NAMESPACE::ModelT<float, inputSize, 1, RecurrentLayerTypeComplete, DenseLayerType> model;
};

template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
Expand All @@ -98,7 +78,7 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::initial
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare ([[maybe_unused]] int rnnDelaySamples)
{
if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::NoInterp)
if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp)
{
internal->model.template get<0>().prepare (rnnDelaySamples);
internal->model.reset();
Expand All @@ -108,7 +88,7 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::prepare ([[maybe_unused]] float rnnDelaySamples)
{
if constexpr (SRCMode == (int) RTNeural::SampleRateCorrectionMode::LinInterp)
if constexpr (SRCMode == (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp)
{
internal->model.template get<0>().prepare (rnnDelaySamples);
internal->model.reset();
Expand Down Expand Up @@ -160,9 +140,9 @@ void RNNAccelerated<inputSize, hiddenSize, RecurrentLayerType, SRCMode>::process
}
}

template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // MetalFace
template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::NoInterp>; // BassFace
template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond)
template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNeural::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond)
template class RNNAccelerated<1, 28, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // MetalFace
template class RNNAccelerated<2, 24, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::NoInterp>; // BassFace
template class RNNAccelerated<1, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (no-cond)
template class RNNAccelerated<2, 40, RecurrentLayerType::LSTMLayer, (int) RTNEURAL_NAMESPACE::SampleRateCorrectionMode::LinInterp>; // GuitarML (cond)
#endif // NEON + AVX
}
40 changes: 3 additions & 37 deletions src/processors/drive/neural_utils/RNNAccelerated.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include <memory>
#include <modules/json/json.hpp>
#include <span>

Expand All @@ -10,8 +9,7 @@ constexpr int LSTMLayer = 1;
constexpr int GRULayer = 2;
} // namespace RecurrentLayerType

#if __aarch64__ || __arm__
namespace rnn_arm
namespace rnn_sse_arm
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
class RNNAccelerated
Expand Down Expand Up @@ -42,41 +40,9 @@ class RNNAccelerated
static constexpr size_t alignment = 16;
alignas (alignment) char internal_data[max_model_size] {};
};
} // namespace rnn_arm
#else // intel
namespace rnn_sse
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
class RNNAccelerated
{
public:
RNNAccelerated();
~RNNAccelerated();

RNNAccelerated (const RNNAccelerated&) = delete;
RNNAccelerated& operator= (const RNNAccelerated&) = delete;
RNNAccelerated (RNNAccelerated&&) noexcept = delete;
RNNAccelerated& operator= (RNNAccelerated&&) noexcept = delete;

void initialise (const nlohmann::json& weights_json);

void prepare (int rnnDelaySamples);
void prepare (float rnnDelaySamples);
void reset();

void process (std::span<float> buffer, bool useResiduals = false) noexcept;
void process_conditioned (std::span<float> buffer, std::span<const float> condition, bool useResiduals = false) noexcept;

private:
struct Internal;
Internal* internal = nullptr;

static constexpr size_t max_model_size = 30000;
static constexpr size_t alignment = 16;
alignas (alignment) char internal_data[max_model_size] {};
};
} // namespace rnn_sse
} // namespace rnn_sse_arm

#if __MMX__ || __SSE__ || __amd64__ // INTEL
namespace rnn_avx
{
template <int inputSize, int hiddenSize, int RecurrentLayerType, int SRCMode>
Expand Down
10 changes: 5 additions & 5 deletions src/processors/drive/neural_utils/ResampledRNNAccelerated.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ class ResampledRNNAccelerated
}

private:
EA::Variant<rnn_sse_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>
#if JUCE_INTEL
EA::Variant<rnn_sse::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>>
model_variant;
#elif JUCE_ARM
EA::Variant<rnn_arm::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>> model_variant;
,
rnn_avx::RNNAccelerated<numIns, hiddenSize, RecurrentLayerType, (int) RTNeural::SampleRateCorrectionMode::NoInterp>
#endif
>
model_variant;

using ResamplerType = chowdsp::ResamplingTypes::LanczosResampler<8192, 8>;
chowdsp::ResampledProcess<ResamplerType> resampler;
Expand Down
10 changes: 6 additions & 4 deletions src/processors/drive/neural_utils/model_loaders.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <RTNeural/RTNeural.h>

namespace model_loaders
{
using Vec2d = std::vector<std::vector<float>>;
Expand All @@ -22,8 +24,8 @@ template <typename ModelType>
void loadLSTMModel (ModelType& model, const nlohmann::json& weights_json)
{
const auto& state_dict = weights_json.at ("state_dict");
RTNeural::torch_helpers::loadLSTM<float> (state_dict, "rec.", model.template get<0>());
RTNeural::torch_helpers::loadDense<float> (state_dict, "lin.", model.template get<1>());
RTNEURAL_NAMESPACE::torch_helpers::loadLSTM<float> (state_dict, "rec.", model.template get<0>());
RTNEURAL_NAMESPACE::torch_helpers::loadDense<float> (state_dict, "lin.", model.template get<1>());
}

template <typename ModelType>
Expand All @@ -38,7 +40,7 @@ void loadGRUModel (ModelType& model, const nlohmann::json& weights_json)

int layer_idx = 0;
const auto& gru_weights = gru_layer_json["weights"];
RTNeural::json_parser::loadGRU<float> (gru, gru_weights);
RTNeural::modelt_detail::loadLayer<float> (dense, layer_idx, dense_layer_json, "dense", 1, false);
RTNEURAL_NAMESPACE::json_parser::loadGRU<float> (gru, gru_weights);
RTNEURAL_NAMESPACE::modelt_detail::loadLayer<float> (dense, layer_idx, dense_layer_json, "dense", 1, false);
}
} // namespace model_loaders
Loading