Skip to content

Commit

Permalink
Address inspect tool, check module cmakelists, warnings and spell check
Browse files Browse the repository at this point in the history
- missing includes
- prevent max/min being expanded as macros
- minor spell check correction
- remove pragma once in cpp file
- resolve implicit type conversions in rfa type to single and double and other places
- add dual license
- remove unnecessary command for macos ci
- use HPX_UNROLL instead of vanilla pragma

Signed-off-by: Shreyas Atre <[email protected]>
  • Loading branch information
SAtacker committed Dec 20, 2024
1 parent 520f161 commit 64d38bc
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 52 deletions.
1 change: 0 additions & 1 deletion .github/workflows/macos_debug_fetch_hwloc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ jobs:
run: |
brew install --overwrite python-tk && \
brew install --overwrite boost gperftools ninja autoconf automake && \
autoreconf -f -i \
brew upgrade cmake
- name: Configure
shell: bash
Expand Down
3 changes: 3 additions & 0 deletions libs/core/algorithms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ set(algorithms_headers
hpx/parallel/algorithms/detail/parallel_stable_sort.hpp
hpx/parallel/algorithms/detail/pivot.hpp
hpx/parallel/algorithms/detail/reduce.hpp
hpx/parallel/algorithms/detail/reduce_deterministic.hpp
hpx/parallel/algorithms/detail/replace.hpp
hpx/parallel/algorithms/detail/rfa.hpp
hpx/parallel/algorithms/detail/rotate.hpp
hpx/parallel/algorithms/detail/sample_sort.hpp
hpx/parallel/algorithms/detail/search.hpp
Expand Down Expand Up @@ -72,6 +74,7 @@ set(algorithms_headers
hpx/parallel/algorithms/partition.hpp
hpx/parallel/algorithms/reduce_by_key.hpp
hpx/parallel/algorithms/reduce.hpp
hpx/parallel/algorithms/reduce_deterministic.hpp
hpx/parallel/algorithms/remove_copy.hpp
hpx/parallel/algorithms/remove.hpp
hpx/parallel/algorithms/replace.hpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <hpx/parallel/util/loop.hpp>

#include <cstddef>
#include <cstring>
#include <limits>
#include <type_traits>
#include <utility>
Expand All @@ -32,6 +33,8 @@ namespace hpx::parallel::detail {
sequential_reduce_deterministic_t, ExPolicy&&, InIterB first,
InIterE last, T init, Reduce&& r)
{
/// TODO: Put constraint on Reduce to be a binary plus operator
(void) r;
hpx::parallel::detail::rfa::RFA_bins<T> bins;
bins.initialize_bins();
std::memcpy(rfa::__rfa_bin_host_buffer__, &bins, sizeof(bins));
Expand Down
136 changes: 92 additions & 44 deletions libs/core/algorithms/include/hpx/parallel/algorithms/detail/rfa.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,34 @@
// Copyright (c) 2024 Shreyas Atre
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
// ---------------------------------------------------------------------------
// This file has been taken from
// https://github.com/maddyscientist/reproducible_floating_sums commit
// b5a065741d4ea459437ca004b508de9dcb6a3e52. The boost copyright has been added
// to this file in accordance with the dual license terms for the Reproducible
// Floating-Point Summations and conformance with the HPX policy
// https://github.com/maddyscientist/reproducible_floating_sums/blob/feature/cuda/LICENSE.md
// ---------------------------------------------------------------------------
//
/// Copyright 2022 Richard Barnes, Peter Ahrens, James Demmel
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to deal
/// in the Software without restriction, including without limitation the rights
/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
/// copies of the Software, and to permit persons to whom the Software is
/// furnished to do so, subject to the following conditions:
/// The above copyright notice and this permission notice shall be included in
/// all copies or substantial portions of the Software.
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
/// SOFTWARE.
//Reproducible Floating Point Accumulations via Binned Floating Point
//Adapted to C++ by Richard Barnes from ReproBLAS v2.1.0.
//ReproBLAS by Peter Ahrens, Hong Diep Nguyen, and James Demmel.
Expand Down Expand Up @@ -26,6 +57,10 @@
#include <cmath>
#include <cstdint>
#include <limits>
#include <type_traits>
#include <vector>

#include <hpx/config.hpp>

namespace hpx::parallel::detail::rfa {
template <typename F>
Expand Down Expand Up @@ -179,7 +214,7 @@ namespace hpx::parallel::detail::rfa {
static constexpr int FOLD = FOLD_;

private:
std::array<ftype, 2 * FOLD> data = {0};
std::array<ftype, 2 * FOLD> data = {{0}};

///Floating-point precision bin width
static constexpr auto BIN_WIDTH =
Expand Down Expand Up @@ -351,29 +386,29 @@ namespace hpx::parallel::detail::rfa {

///Get index of float-point precision
///The index of a non-binned type is the smallest index a binned type would
///need to have to sum it reproducibly. Higher indicies correspond to smaller
///need to have to sum it reproducibly. Higher indices correspond to smaller
///bins.
static inline constexpr int binned_dindex(const ftype x)
{
int exp = EXP(x);
if (exp == 0)
{
if (x == 0.0)
if (x == static_cast<ftype>(0.0))
{
return MAXINDEX;
}
else
{
std::frexp(x, &exp);
return std::max((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX);
return (std::max)((MAX_EXP - exp) / BIN_WIDTH, MAXINDEX);
}
}
return ((MAX_EXP + EXP_BIAS) - exp) / BIN_WIDTH;
}

///Get index of manually specified binned double precision
///The index of a binned type is the bin that it corresponds to. Higher
///indicies correspond to smaller bins.
///indices correspond to smaller bins.
inline int binned_index() const
{
return ((MAX_EXP + MANT_DIG - BIN_WIDTH + 1 + EXP_BIAS) -
Expand Down Expand Up @@ -416,7 +451,7 @@ namespace hpx::parallel::detail::rfa {
int shift = binned_index() - X_index;
if (shift > 0)
{
#pragma unroll
HPX_UNROLL
for (int i = FOLD - 1; i >= 1; i--)
{
if (i < shift)
Expand All @@ -425,7 +460,7 @@ namespace hpx::parallel::detail::rfa {
carry(i * inccarY) = carry((i - shift) * inccarY);
}
const ftype* const bins = binned_bins(X_index);
#pragma unroll
HPX_UNROLL
for (int j = 0; j < FOLD; j++)
{
if (j >= shift)
Expand Down Expand Up @@ -457,16 +492,16 @@ namespace hpx::parallel::detail::rfa {
if (binned_index0())
{
M = primary(0);
ftype qd = x * COMPRESSION;
ftype qd = x * static_cast<ftype>(COMPRESSION);
auto& ql = get_bits(qd);
ql |= 1;
qd += M;
primary(0) = qd;
M -= qd;
M *= EXPANSION * 0.5;
M *= (double) (((double) EXPANSION) * 0.5);
x += M;
x += M;
#pragma unroll
HPX_UNROLL
for (int i = 1; i < FOLD - 1; i++)
{
M = primary(i * incpriY);
Expand All @@ -485,7 +520,7 @@ namespace hpx::parallel::detail::rfa {
{
ftype qd = x;
auto& ql = get_bits(qd);
#pragma unroll
HPX_UNROLL
for (int i = 0; i < FOLD - 1; i++)
{
M = primary(i * incpriY);
Expand Down Expand Up @@ -550,7 +585,7 @@ namespace hpx::parallel::detail::rfa {
int i = 0;

if (ISNANINF(primary(0)))
return primary(0);
return (double) primary(0);
if (ISZERO(primary(0)))
return 0.0;

Expand All @@ -564,29 +599,36 @@ namespace hpx::parallel::detail::rfa {
{
scale_down = std::ldexp(0.5, 1 - (2 * MANT_DIG - BIN_WIDTH));
scale_up = std::ldexp(0.5, 1 + (2 * MANT_DIG - BIN_WIDTH));
scaled = std::max(
std::min(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0);
scaled = (std::max)(
(std::min)(FOLD, (3 * MANT_DIG) / BIN_WIDTH - X_index), 0);
if (X_index == 0)
{
Y += carry(0) * ((bins[0] / 6.0) * scale_down * EXPANSION);
Y += carry(inccarX) * ((bins[1] / 6.0) * scale_down);
Y += (primary(0) - bins[0]) * scale_down * EXPANSION;
Y += ((double) carry(0)) *
((((double) bins[0]) / 6.0) * scale_down * EXPANSION);
Y += ((double) carry(inccarX)) *
((((double) bins[1]) / 6.0) * scale_down);
Y += ((double) primary(0) - (double) bins[0]) * scale_down *
EXPANSION;
i = 2;
}
else
{
Y += carry(0) * ((bins[0] / 6.0) * scale_down);
Y += ((double) carry(0)) *
(((double) bins[0] / 6.0) * scale_down);
i = 1;
}
for (; i < scaled; i++)
{
Y += carry(i * inccarX) * ((bins[i] / 6.0) * scale_down);
Y +=
(primary((i - 1) * incpriX) - bins[i - 1]) * scale_down;
Y += ((double) carry(i * inccarX)) *
(((double) bins[i] / 6.0) * scale_down);
Y += ((double) primary((i - 1) * incpriX) -
(double) (bins[i - 1])) *
scale_down;
}
if (i == FOLD)
{
Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]) *
Y += ((double) primary((FOLD - 1) * incpriX) -
(double) (bins[FOLD - 1])) *
scale_down;
return Y * scale_up;
}
Expand All @@ -597,20 +639,23 @@ namespace hpx::parallel::detail::rfa {
Y *= scale_up;
for (; i < FOLD; i++)
{
Y += carry(i * inccarX) * (bins[i] / 6.0);
Y += primary((i - 1) * incpriX) - bins[i - 1];
Y += ((double) carry(i * inccarX)) *
((double) bins[i] / 6.0);
Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
}
Y += primary((FOLD - 1) * incpriX) - bins[FOLD - 1];
Y += ((double) primary((FOLD - 1) * incpriX) -
((double) bins[FOLD - 1]));
}
else
{
Y += carry(0) * (bins[0] / 6.0);
Y += ((double) carry(0)) * ((double) bins[0] / 6.0);
for (i = 1; i < FOLD; i++)
{
Y += carry(i * inccarX) * (bins[i] / 6.0);
Y += (primary((i - 1) * incpriX) - bins[i - 1]);
Y += ((double) carry(i * inccarX)) *
((double) bins[i] / 6.0);
Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
}
Y += (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
}
return Y;
}
Expand All @@ -627,7 +672,7 @@ namespace hpx::parallel::detail::rfa {
if (ISNANINF(primary(0)))
return primary(0);
if (ISZERO(primary(0)))
return 0.0;
return 0.0f;

//Note that the following order of summation is in order of decreasing
//exponent. The following code is specific to SBWIDTH=13, FLT_MANT_DIG=24, and
Expand All @@ -636,20 +681,22 @@ namespace hpx::parallel::detail::rfa {
const auto* const bins = binned_bins(X_index);
if (X_index == 0)
{
Y += (double) carry(0) * (double) (bins[0] / 6.0) *
Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0) *
(double) EXPANSION;
Y += (double) carry(inccarX) * (double) (bins[1] / 6.0);
Y += (double) carry(inccarX) *
(double) (((double) bins[1]) / 6.0);
Y += (double) (primary(0) - bins[0]) * (double) EXPANSION;
i = 2;
}
else
{
Y += (double) carry(0) * (double) (bins[0] / 6.0);
Y += (double) carry(0) * (double) (((double) bins[0]) / 6.0);
i = 1;
}
for (; i < FOLD; i++)
{
Y += (double) carry(i * inccarX) * (double) (bins[i] / 6.0);
Y += (double) carry(i * inccarX) *
(double) (((double) bins[i]) / 6.0);
Y += (double) (primary((i - 1) * incpriX) - bins[i - 1]);
}
Y += (double) (primary((FOLD - 1) * incpriX) - bins[FOLD - 1]);
Expand Down Expand Up @@ -695,7 +742,7 @@ namespace hpx::parallel::detail::rfa {
{
const auto* const bins = binned_bins(Y_index);
//shift Y upwards and add X to Y
#pragma unroll
HPX_UNROLL
for (int i = FOLD - 1; i >= 1; i--)
{
if (i < shift)
Expand All @@ -705,7 +752,7 @@ namespace hpx::parallel::detail::rfa {
carry(i * inccarY) =
x.carry(i * inccarX) + carry((i - shift) * inccarY);
}
#pragma unroll
HPX_UNROLL
for (int i = 0; i < FOLD; i++)
{
if (i == shift)
Expand All @@ -718,7 +765,7 @@ namespace hpx::parallel::detail::rfa {
{
const auto* const bins = binned_bins(X_index);
//shift X upwards and add X to Y
#pragma unroll
HPX_UNROLL
for (int i = 0; i < FOLD; i++)
{
if (i < -shift)
Expand All @@ -732,7 +779,7 @@ namespace hpx::parallel::detail::rfa {
{
const auto* const bins = binned_bins(X_index);
// add X to Y
#pragma unroll
HPX_UNROLL
for (int i = 0; i < FOLD; i++)
{
primary(i * incpriY) += x.primary(i * incpriX) - bins[i];
Expand Down Expand Up @@ -771,7 +818,7 @@ namespace hpx::parallel::detail::rfa {
}

///Return the endurance of the binned fp
constexpr int endurance() const
constexpr size_t endurance() const
{
return ENDURANCE;
}
Expand Down Expand Up @@ -867,11 +914,11 @@ namespace hpx::parallel::detail::rfa {
{
if (std::is_same_v<ftype, float>)
{
return binned_conv_single(1, 1);
return static_cast<ftype>(binned_conv_single(1, 1));
}
else
{
return binned_conv_double(1, 1);
return static_cast<ftype>(binned_conv_double(1, 1));
}
}

Expand All @@ -888,7 +935,8 @@ namespace hpx::parallel::detail::rfa {
{
const double X = std::abs(max_abs_val);
const double S = std::abs(binned_sum);
return static_cast<ftype>(max(X, std::ldexp(0.5, MIN_EXP - 1)) *
return static_cast<ftype>(
(std::max)(X, std::ldexp(0.5, MIN_EXP - 1)) *
std::ldexp(0.5, (1 - FOLD) * BIN_WIDTH + 1) * N +
((7.0 * EPSILON) /
(1.0 - 6.0 * std::sqrt(static_cast<double>(EPSILON)) -
Expand Down Expand Up @@ -973,7 +1021,7 @@ namespace hpx::parallel::detail::rfa {
T max_abs_val = input[0];
for (size_t i = 0; i < N; i++)
{
max_abs_val = max(max_abs_val, std::abs(input[i]));
max_abs_val = (std::max)(max_abs_val, std::abs(input[i]));
}
add(input, N, max_abs_val);
}
Expand Down Expand Up @@ -1142,4 +1190,4 @@ namespace hpx::parallel::detail::rfa {
}
};

} // namespace hpx::parallel::detail::rfa
} // namespace hpx::parallel::detail::rfa
Loading

0 comments on commit 64d38bc

Please sign in to comment.