-
Notifications
You must be signed in to change notification settings - Fork 0
/
xsum.h
151 lines (104 loc) · 6.14 KB
/
xsum.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#pragma once
/* INTERFACE TO FUNCTIONS FOR EXACT SUMMATION. */
/* Written by Radford M. Neal, 2015 */
#include <stdlib.h>
#include <stdint.h>
/* CONSTANTS DEFINING THE FLOATING POINT FORMAT. */
typedef double xsum_flt; /* C floating point type sums are done for */
typedef int64_t xsum_int; /* Signed integer type for a fp value */
typedef uint64_t xsum_uint; /* Unsigned integer type for a fp value */
typedef int_fast16_t xsum_expint; /* Integer type for holding an exponent */
#define XSUM_MANTISSA_BITS 52 /* Bits in fp mantissa, excludes implict 1 */
#define XSUM_EXP_BITS 11 /* Bits in fp exponent */
#define XSUM_MANTISSA_MASK \
(((xsum_int)1 << XSUM_MANTISSA_BITS) - 1) /* Mask for mantissa bits */
#define XSUM_EXP_MASK \
((1 << XSUM_EXP_BITS) - 1) /* Mask for exponent */
#define XSUM_EXP_BIAS \
((1 << (XSUM_EXP_BITS-1)) - 1) /* Bias added to signed exponent */
#define XSUM_SIGN_BIT \
(XSUM_MANTISSA_BITS + XSUM_EXP_BITS) /* Position of sign bit */
#define XSUM_SIGN_MASK \
((xsum_uint)1 << XSUM_SIGN_BIT) /* Mask for sign bit */
/* CONSTANTS DEFINING THE SMALL ACCUMULATOR FORMAT. */
#define XSUM_SCHUNK_BITS 64 /* Bits in chunk of the small accumulator */
typedef int64_t xsum_schunk; /* Integer type of small accumulator chunk */
#define XSUM_LOW_EXP_BITS 5 /* # of low bits of exponent, in one chunk */
#define XSUM_LOW_EXP_MASK \
((1 << XSUM_LOW_EXP_BITS) - 1) /* Mask for low-order exponent bits */
#define XSUM_HIGH_EXP_BITS \
(XSUM_EXP_BITS - XSUM_LOW_EXP_BITS) /* # of high exponent bits for index */
#define XSUM_HIGH_EXP_MASK \
((1 << HIGH_EXP_BITS) - 1) /* Mask for high-order exponent bits */
#define XSUM_SCHUNKS \
((1 << XSUM_HIGH_EXP_BITS) + 3) /* # of chunks in small accumulator */
#define XSUM_LOW_MANTISSA_BITS \
(1 << XSUM_LOW_EXP_BITS) /* Bits in low part of mantissa */
#define XSUM_HIGH_MANTISSA_BITS \
(XSUM_MANTISSA_BITS - XSUM_LOW_MANTISSA_BITS) /* Bits in high part */
#define XSUM_LOW_MANTISSA_MASK \
(((xsum_int)1 << XSUM_LOW_MANTISSA_BITS) - 1) /* Mask for low bits */
#define XSUM_SMALL_CARRY_BITS \
((XSUM_SCHUNK_BITS-1) - XSUM_MANTISSA_BITS) /* Bits sums can carry into */
#define XSUM_SMALL_CARRY_TERMS \
((1 << XSUM_SMALL_CARRY_BITS) - 1) /* # terms can add before need prop. */
typedef struct
{ xsum_schunk chunk[XSUM_SCHUNKS]; /* Chunks making up small accumulator */
xsum_int Inf; /* If non-zero, +Inf, -Inf, or NaN */
xsum_int NaN; /* If non-zero, a NaN value with payload */
int adds_until_propagate; /* Number of remaining adds before carry */
} xsum_small_accumulator; /* propagation must be done again */
/* CONSTANTS DEFINING THE LARGE ACCUMULATOR FORMAT. */
#define XSUM_LCHUNK_BITS 64 /* Bits in chunk of the large accumulator */
typedef uint64_t xsum_lchunk; /* Integer type of large accumulator chunk,
must be EXACTLY 64 bits in size */
#define XSUM_LCOUNT_BITS (64-XSUM_MANTISSA_BITS) /* # of bits in count */
typedef int_least16_t xsum_lcount; /* Signed int type of counts for large acc.*/
#define XSUM_LCHUNKS \
(1 << (XSUM_EXP_BITS+1)) /* # of chunks in large accumulator */
typedef uint_fast64_t xsum_used; /* Unsigned type for holding used flags */
typedef struct
{ xsum_lchunk chunk[XSUM_LCHUNKS]; /* Chunks making up large accumulator */
xsum_lcount count[XSUM_LCHUNKS]; /* Counts of # adds remaining for chunks,
or -1 if not used yet or special. */
xsum_used chunks_used[XSUM_LCHUNKS/64]; /* Bits indicate chunks in use */
xsum_used used_used; /* Bits indicate chunk_used entries not 0 */
xsum_small_accumulator sacc; /* The small accumulator to condense into */
} xsum_large_accumulator;
/* TYPE FOR LENGTHS OF ARRAYS. Must be a signed integer type. */
typedef int xsum_length;
/* FUNCTIONS FOR EXACT SUMMATION. */
void xsum_small_init (xsum_small_accumulator *__restrict);
void xsum_small_add1 (xsum_small_accumulator *__restrict, xsum_flt);
void xsum_small_addv (xsum_small_accumulator *__restrict,
const xsum_flt *__restrict, xsum_length);
void xsum_small_add_sqnorm (xsum_small_accumulator *__restrict,
const xsum_flt *__restrict, xsum_length);
void xsum_small_add_dot (xsum_small_accumulator *__restrict,
const xsum_flt *, const xsum_flt *, xsum_length);
xsum_flt xsum_small_round (xsum_small_accumulator *__restrict);
void xsum_small_display (xsum_small_accumulator *__restrict);
int xsum_small_chunks_used (xsum_small_accumulator *__restrict);
void xsum_large_init (xsum_large_accumulator *__restrict);
void xsum_large_addv (xsum_large_accumulator *__restrict,
const xsum_flt *__restrict, xsum_length);
void xsum_large_add_sqnorm (xsum_large_accumulator *__restrict,
const xsum_flt *__restrict, xsum_length);
void xsum_large_add_dot (xsum_large_accumulator *__restrict,
const xsum_flt *, const xsum_flt *, xsum_length);
xsum_flt xsum_large_round (xsum_large_accumulator *__restrict);
void xsum_large_display (xsum_large_accumulator *__restrict);
int xsum_large_chunks_used (xsum_large_accumulator *__restrict);
/* FUNCTIONS FOR DOUBLE AND OTHER INEXACT SUMMATION. */
xsum_flt xsum_sum_double (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_sum_double_not_ordered (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_sum_float128 (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_sum_kahan (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_sqnorm_double (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_sqnorm_double_not_ordered (const xsum_flt *__restrict, xsum_length);
xsum_flt xsum_dot_double (const xsum_flt *, const xsum_flt *, xsum_length);
xsum_flt xsum_dot_double_not_ordered (const xsum_flt *, const xsum_flt *,
xsum_length);
/* DEBUG FLAG. Set to non-zero for debug ouptut. Ignored unless xsum.c
is compiled with -DDEBUG. */
extern int xsum_debug;