From a0c82a6a47c785ee11bb5cc3599f5f9d1e475475 Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Tue, 26 Nov 2024 03:01:45 +0100 Subject: [PATCH] Updated base32 API. --- lib/std/encoding/base32.c3 | 417 +++++++++++++++------------- releasenotes.md | 1 + test/unit/stdlib/encoding/base32.c3 | 66 ++--- 3 files changed, 261 insertions(+), 223 deletions(-) diff --git a/lib/std/encoding/base32.c3 b/lib/std/encoding/base32.c3 index c4fd6bff0..35d9fc1f8 100644 --- a/lib/std/encoding/base32.c3 +++ b/lib/std/encoding/base32.c3 @@ -3,113 +3,166 @@ module std::encoding::base32; // This module implements base32 encoding according to RFC 4648 // (https://www.rfc-editor.org/rfc/rfc4648) -distinct Alphabet = inline char[32]; - -// Standard base32 Alphabet -const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; - -// Extended Hex Alphabet -const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV"; - -fn String! encode_buffer(char[] code, char[] buffer) +struct Base32Alphabet { - @check_coder(std_encoder); - return (String)buffer[:std_encoder.encode(code, buffer)!]; + char[32] encoding; + char[256] reverse; } -fn char[]! decode_buffer(char[] code, char[] buffer) -{ - @check_coder(std_decoder); - return buffer[:std_decoder.decode(code, buffer)!]; -} +const char NO_PAD = 0; +const char DEFAULT_PAD = '='; -fn String! encode(char[] code, Allocator allocator) +fn String! encode_buffer(char[] code, char[] buffer, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - @check_coder(std_encoder); - char[] data = allocator::alloc_array(allocator, char, std_encoder.encode_len(code.len)); - return (String)data[:std_encoder.encode(code, data)!]; + return (String)encode_data(alphabet, code, buffer, padding); } -fn char[]! decode(char[] code, Allocator allocator) +fn char[]! decode_buffer(char[] code, char[] buffer, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - @check_coder(std_decoder); - char[] data = allocator::alloc_array(allocator, char, std_decoder.decode_len(code.len)); - return data[:std_decoder.decode(code, data)!]; + return decode_data(alphabet, code, buffer, padding)!; } -fn String! encode_new(char[] code) @inline => encode(code, allocator::heap()); -fn String! encode_temp(char[] code) @inline => encode(code, allocator::temp()); -fn char[]! decode_new(char[] code) @inline => decode(code, allocator::heap()); -fn char[]! decode_temp(char[] code) @inline => decode(code, allocator::temp()); - -const uint MASK @private = 0b11111; -const char INVALID @private = 0xff; - -const int STD_PADDING = '='; -const int NO_PADDING = -1; - -fault Base32Error +fn String! encode(char[] code, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - DUPLICATE_IN_ALPHABET, - PADDING_IN_ALPHABET, - INVALID_CHARACTER_IN_ALPHABET, - DESTINATION_TOO_SMALL, - INVALID_PADDING, - CORRUPT_INPUT + char[] data = allocator::alloc_array(allocator, char, encode_len(code.len, padding > 0)); + return (String)encode_buffer(code, data, padding, alphabet); } -struct Base32Encoder +fn char[]! decode(char[] code, Allocator allocator, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) { - Alphabet alphabet; - int padding; + char[] data = allocator::alloc_array(allocator, char, decode_len(code.len, padding > 0)); + return decode_buffer(code, data, padding, alphabet); } +fn String! encode_new(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::heap(), padding, alphabet); +fn String! encode_temp(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => encode(code, allocator::temp(), padding, alphabet); +fn char[]! decode_new(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::heap(), padding, alphabet); +fn char[]! decode_temp(char[] code, char padding = DEFAULT_PAD, Base32Alphabet* alphabet = &STANDARD) @inline => decode(code, allocator::temp(), padding, alphabet); + <* - @param encoder "The 32-character alphabet for encoding." - @param padding "Set to a negative value to disable padding." - @require padding < 256 + Calculate the length in bytes of the decoded data. + @param n "Length in bytes of input." + @param use_padding "Whether padding characters are used or not" + @return "Length in bytes of the decoded data." *> -fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING) +fn usz decode_len(usz n, bool use_padding = true) { - encoder.validate(padding)!; - *self = { .alphabet = encoder, .padding = padding }; + if (use_padding) return (n / 8) * 5; + // no padding + usz trailing = n % 8; + return n / 8 * 5 + (trailing * 5 ) / 8; } <* Calculate the length in bytes of the encoded data. @param n "Length in bytes on input." + @param use_padding "Whether padding characters are used or not" @return "Length in bytes of the encoded data." *> -fn usz Base32Encoder.encode_len(&self, usz n) +fn usz encode_len(usz n, bool use_padding = true) { // A character is encoded into 8 x 5-bit blocks. - if (self.padding >= 0) - { - // with padding - return (n + 4) / 5 * 8; - } - else + if (use_padding) return (n + 4) / 5 * 8; + + // no padding + usz trailing = n % 5; + return n / 5 * 8 + (trailing * 8 + 4) / 5; +} + +<* + Decode the content of src into dst, which must be properly sized. + @param src "The input to be decoded." + @param dst "The decoded input." + @require dst.len >= decode_len(src.len, padding > 0) "Destination buffer too small" + @return "The resulting dst buffer" + @return! DecodingFailure +*> +fn char[]! decode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char padding) @private +{ + if (src.len == 0) return dst[:0]; + char* dst_ptr = dst; + usz dn = decode_len(src.len, padding > 0); + usz n; + char[8] buf; + while (src.len > 0 && dst.len > 0) { - // no padding - usz trailing = n % 5; - return n / 5 * 8 + (trailing * 8 + 4) / 5; + usz i @noinit; + // load 8 bytes into buffer + for (i = 0; i < 8; i++) + { + if (src.len == 0) + { + if (padding > 0) return DecodingFailure.INVALID_PADDING?; + break; + } + if (src[0] == padding) break; + buf[i] = alphabet.reverse[src[0]]; + if (buf[i] == INVALID) return DecodingFailure.INVALID_CHARACTER?; + src = src[1..]; + } + + // extract 5-bytes from the buffer which contains 8 x 5 bit chunks + switch (i) + { + case 8: + // |66677777| dst[4] + // | 77777| buf[7] + // |666 | buf[6] << 5 + dst[4] = buf[7] | buf[6] << 5; + n++; + nextcase 7; + case 7: + // |45555566| dst[3] + // | 66| buf[6] >> 3 + // | 55555 | buf[5] << 2 + // |4 | buf[4] << 7 + dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7; + n++; + nextcase 5; + case 5: + // |33334444| dst[2] + // | 4444| buf[4] >> 1 + // |3333 | buf[3] << 4 + dst[2] = buf[4] >> 1 | buf[3] << 4; + n++; + nextcase 4; + case 4: + // |11222223| dst[1] + // | 3| buf[3] >> 4 + // | 22222 | buf[2] << 1 + // |11 | buf[1] << 6 + dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6; + n++; + nextcase 2; + case 2: + // |00000111| dst[0] + // | 111| buf[1] >> 2 + // |00000 | buf[0] << 3 + dst[0] = buf[1] >> 2 | buf[0] << 3; + n++; + default: + return DecodingFailure.INVALID_CHARACTER?; + } + if (dst.len < 5) break; + dst = dst[5..]; } + return dst_ptr[:n]; } <* Encode the content of src into dst, which must be properly sized. @param [in] src "The input to be encoded." @param [inout] dst "The encoded input." + @require dst.len >= encode_len(src.len, padding > 0) "Destination buffer too small" @return "The encoded size." - @return! Base32Error.DESTINATION_TOO_SMALL *> -fn usz! Base32Encoder.encode(&self, char[] src, char[] dst) +fn char[] encode_data(Base32Alphabet* alphabet, char[] src, char[] dst, char padding = DEFAULT_PADDING) { - if (src.len == 0) return 0; + if (src.len == 0) return dst[:0]; + char* dst_ptr = dst; usz n = (src.len / 5) * 5; - usz dn = self.encode_len(src.len); - if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?; + usz dn = encode_len(src.len, padding > 0); uint msb, lsb; for (usz i = 0; i < n; i += 5) @@ -121,20 +174,20 @@ fn usz! Base32Encoder.encode(&self, char[] src, char[] dst) // now slice them into 5-bit chunks and translate to the // alphabet. - dst[0] = self.alphabet[(msb >> 27) & MASK]; - dst[1] = self.alphabet[(msb >> 22) & MASK]; - dst[2] = self.alphabet[(msb >> 17) & MASK]; - dst[3] = self.alphabet[(msb >> 12) & MASK]; - dst[4] = self.alphabet[(msb >> 7) & MASK]; - dst[5] = self.alphabet[(msb >> 2) & MASK]; - dst[6] = self.alphabet[(lsb >> 5) & MASK]; - dst[7] = self.alphabet[lsb & MASK]; + dst[0] = alphabet.encoding[(msb >> 27) & MASK]; + dst[1] = alphabet.encoding[(msb >> 22) & MASK]; + dst[2] = alphabet.encoding[(msb >> 17) & MASK]; + dst[3] = alphabet.encoding[(msb >> 12) & MASK]; + dst[4] = alphabet.encoding[(msb >> 7) & MASK]; + dst[5] = alphabet.encoding[(msb >> 2) & MASK]; + dst[6] = alphabet.encoding[(lsb >> 5) & MASK]; + dst[7] = alphabet.encoding[lsb & MASK]; dst = dst[8..]; } usz trailing = src.len - n; - if (trailing == 0) return dn; + if (trailing == 0) return dst_ptr[:dn]; msb = 0; switch (trailing) @@ -142,42 +195,97 @@ fn usz! Base32Encoder.encode(&self, char[] src, char[] dst) case 4: msb |= (uint)src[n+3]; lsb = msb << 8; - dst[6] = self.alphabet[(lsb >> 5) & MASK]; - dst[5] = self.alphabet[(msb >> 2) & MASK]; + dst[6] = alphabet.encoding[(lsb >> 5) & MASK]; + dst[5] = alphabet.encoding[(msb >> 2) & MASK]; nextcase 3; case 3: msb |= (uint)src[n+2] << 8; - dst[4] = self.alphabet[(msb >> 7) & MASK]; + dst[4] = alphabet.encoding[(msb >> 7) & MASK]; nextcase 2; case 2: msb |= (uint)src[n+1] << 16; - dst[3] = self.alphabet[(msb >> 12) & MASK]; - dst[2] = self.alphabet[(msb >> 17) & MASK]; + dst[3] = alphabet.encoding[(msb >> 12) & MASK]; + dst[2] = alphabet.encoding[(msb >> 17) & MASK]; nextcase 1; case 1: msb |= (uint)src[n] << 24; - dst[1] = self.alphabet[(msb >> 22) & MASK]; - dst[0] = self.alphabet[(msb >> 27) & MASK]; + dst[1] = alphabet.encoding[(msb >> 22) & MASK]; + dst[0] = alphabet.encoding[(msb >> 27) & MASK]; } // add the padding - if (self.padding >= 0) + if (padding > 0) { - char pad = (char)self.padding; for (usz i = (trailing * 8 / 5) + 1; i < 8; i++) { - dst[i] = pad; + dst[i] = padding; } } + return dst_ptr[:dn]; +} + + +const uint MASK @private = 0b11111; +const char INVALID @private = 0xff; - return dn; +const int STD_PADDING = '='; +const int NO_PADDING = -1; + +fault Base32Error +{ + DUPLICATE_IN_ALPHABET, + PADDING_IN_ALPHABET, + INVALID_CHARACTER_IN_ALPHABET, + DESTINATION_TOO_SMALL, + INVALID_PADDING, + CORRUPT_INPUT } -struct Base32Decoder +struct Base32Encoder @deprecated { - Alphabet alphabet; + Base32Alphabet alphabet; + int padding; +} + +<* + @param encoder "The 32-character alphabet for encoding." + @param padding "Set to a negative value to disable padding." + @require padding < 256 +*> +fn void! Base32Encoder.init(&self, Alphabet encoder = STD_ALPHABET, int padding = STD_PADDING) +{ + encoder.validate(padding)!; + *self = { .alphabet = { .encoding = (char[32])encoder }, .padding = padding }; +} + +<* + Calculate the length in bytes of the encoded data. + @param n "Length in bytes on input." + @return "Length in bytes of the encoded data." +*> +fn usz Base32Encoder.encode_len(&self, usz n) +{ + return encode_len(n, self.padding >= 0); +} + +<* + Encode the content of src into dst, which must be properly sized. + @param [in] src "The input to be encoded." + @param [inout] dst "The encoded input." + @return "The encoded size." + @return! Base32Error.DESTINATION_TOO_SMALL +*> +fn usz! Base32Encoder.encode(&self, char[] src, char[] dst) +{ + usz dn = self.encode_len(src.len); + if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?; + return encode_data(&self.alphabet, src, dst, self.padding < 0 ? 0 : (char)self.padding).len; +} + +struct Base32Decoder @deprecated +{ + Base32Alphabet alphabet; int padding; - char[256] reverse; } <* @@ -188,12 +296,12 @@ struct Base32Decoder fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding = STD_PADDING) { decoder.validate(padding)!; - *self = { .alphabet = decoder, .padding = padding }; + *self = { .alphabet = { .encoding = (char[32])decoder }, .padding = padding }; - self.reverse[..] = INVALID; + self.alphabet.reverse[..] = INVALID; foreach (char i, c : decoder) { - self.reverse[c] = i; + self.alphabet.reverse[c] = i; } } @@ -204,17 +312,7 @@ fn void! Base32Decoder.init(&self, Alphabet decoder = STD_ALPHABET, int padding *> fn usz Base32Decoder.decode_len(&self, usz n) { - if (self.padding >= 0) - { - // with padding - return (n / 8) * 5; - } - else - { - // no padding - usz trailing = n % 8; - return n / 8 * 5 + (trailing * 5 ) / 8; - } + return decode_len(n, self.padding >= 0); } <* @@ -229,83 +327,7 @@ fn usz! Base32Decoder.decode(&self, char[] src, char[] dst) if (src.len == 0) return 0; usz dn = self.decode_len(src.len); if (dst.len < dn) return Base32Error.DESTINATION_TOO_SMALL?; - - usz j, n; - char[8] buf; - while (src.len > 0 && dst.len > 0) - { - - // load 8 bytes into buffer - for (j = 0; j < 8; j++) - { - if (src.len == 0) - { - if (self.padding >= 0) - { - return Base32Error.CORRUPT_INPUT?; - } - break; - } - if (src[0] == (char)self.padding) - { - break; - } - buf[j] = self.reverse[src[0]]; - if (buf[j] == INVALID) - { - return Base32Error.CORRUPT_INPUT?; - } - src = src[1..]; - } - - // extract 5-bytes from the buffer which contains 8 x 5 bit chunks - switch (j) - { - case 8: - // |66677777| dst[4] - // | 77777| buf[7] - // |666 | buf[6] << 5 - dst[4] = buf[7] | buf[6] << 5; - n++; - nextcase 7; - case 7: - // |45555566| dst[3] - // | 66| buf[6] >> 3 - // | 55555 | buf[5] << 2 - // |4 | buf[4] << 7 - dst[3] = buf[6] >> 3 | buf[5] << 2 | buf[4] << 7; - n++; - nextcase 5; - case 5: - // |33334444| dst[2] - // | 4444| buf[4] >> 1 - // |3333 | buf[3] << 4 - dst[2] = buf[4] >> 1 | buf[3] << 4; - n++; - nextcase 4; - case 4: - // |11222223| dst[1] - // | 3| buf[3] >> 4 - // | 22222 | buf[2] << 1 - // |11 | buf[1] << 6 - dst[1] = buf[3] >> 4 | buf[2] << 1 | buf[1] << 6; - n++; - nextcase 2; - case 2: - // |00000111| dst[0] - // | 111| buf[1] >> 2 - // |00000 | buf[0] << 3 - dst[0] = buf[1] >> 2 | buf[0] << 3; - n++; - default: - return Base32Error.CORRUPT_INPUT?; - } - - if (dst.len < 5) break; - dst = dst[5..]; - } - - return n; + return decode_data(&self.alphabet, src, dst, self.padding < 0 ? 0 : (char)self.padding).len; } @@ -340,11 +362,32 @@ fn void! Alphabet.validate(&self, int padding) } } -tlocal Base32Encoder std_encoder @local; -tlocal Base32Decoder std_decoder @local; +distinct Alphabet = char[32]; +// Standard base32 Alphabet +const Alphabet STD_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; +// Extended Hex Alphabet +const Alphabet HEX_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUV"; -macro @check_coder(#coder) @local -{ - if (#coder.alphabet == STD_ALPHABET) return; - #coder.init(STD_ALPHABET, '=')!!; -} +const Base32Alphabet STANDARD = { + .encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", + .reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffff1a1b1c1d1e1fffffffffffffffff + ff000102030405060708090a0b0c0d0e0f10111213141516171819ffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff` +}; + +const Base32Alphabet HEX = { + .encoding = "0123456789ABCDEFGHIJKLMNOPQRSTUV", + .reverse = x`ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffff00010203040506070809ffffffffffff + ff0a0b0c0d0e0f101112131415161718191a1b1c1d1e1fffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff` +}; diff --git a/releasenotes.md b/releasenotes.md index ff9afedfe..9db3e0435 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -27,6 +27,7 @@ ### Stdlib changes - Add `io::MultiReader`, `io::MultiWriter`, and `io::TeeReader` structs. +- Updated Base32 API. ## 0.6.4 Change list diff --git a/test/unit/stdlib/encoding/base32.c3 b/test/unit/stdlib/encoding/base32.c3 index 6f9a6ffe6..88d131622 100644 --- a/test/unit/stdlib/encoding/base32.c3 +++ b/test/unit/stdlib/encoding/base32.c3 @@ -33,70 +33,64 @@ macro encode_tests(tests, alphabet, padding) { foreach (t : tests) { - Base32Encoder b; - b.init(alphabet, padding)!!; + char[64] buf; + usz n = base32::encode_len(t.dec.len, padding > 0); + base32::encode_buffer(t.dec, buf[:n], padding, alphabet)!!; - char[64] buf; - usz n = b.encode_len(t.dec.len); - b.encode(t.dec, buf[:n])!!; - - char[] want = t.enc; - usz! pad_idx = array::index_of(want, '='); - if (try pad_idx && padding < 0) - { - want = want[:pad_idx]; - } + char[] want = t.enc; + usz! pad_idx = array::index_of(want, '='); + if (try pad_idx && !padding) + { + want = want[:pad_idx]; + } - assert(buf[:n] == want, "got: %s, want: %s", - (String)buf[:n], (String)want); + assert(buf[:n] == want, "got: %s, want: %s", + (String)buf[:n], (String)want); } } fn void encode() { - encode_tests(std_tests, base32::STD_ALPHABET, '='); - encode_tests(hex_tests, base32::HEX_ALPHABET, '='); + encode_tests(std_tests, &base32::STANDARD, '='); + encode_tests(hex_tests, &base32::HEX, '='); } fn void encode_nopadding() { - encode_tests(std_tests, base32::STD_ALPHABET, -1); - encode_tests(hex_tests, base32::HEX_ALPHABET, -1); + encode_tests(std_tests, &base32::STANDARD, base32::NO_PAD); + encode_tests(hex_tests, &base32::HEX, base32::NO_PAD); } macro decode_tests(tests, alphabet, padding) { foreach (t : tests) { - Base32Decoder b; - b.init(alphabet, padding)!!; - - char[] input = t.enc[..]; - usz! pad_idx = array::index_of(input, '='); - if (try pad_idx && padding < 0) - { - input = input[:pad_idx]; - } + char[] input = t.enc[..]; + usz! pad_idx = array::index_of(input, '='); + if (try pad_idx && !padding) + { + input = input[:pad_idx]; + } - char[64] buf; - usz n = b.decode_len(input.len); - n = b.decode(input, buf[:n])!!; + char[64] buf; + usz n = base32::decode_len(input.len, padding > 0); + char[] buf2 = base32::decode_buffer(input, buf[:n], padding, alphabet)!!; - assert(buf[:n] == t.dec, "got: %s, want: %s", - (String)buf[:n], (String)t.dec); + assert(buf2 == t.dec, "got: %s, want: %s", + (String)buf2, (String)t.dec); } } fn void decode() { - decode_tests(std_tests, base32::STD_ALPHABET, '='); - decode_tests(hex_tests, base32::HEX_ALPHABET, '='); + decode_tests(std_tests, &base32::STANDARD, '='); + decode_tests(hex_tests, &base32::HEX, '='); } fn void decode_nopadding() { - decode_tests(std_tests, base32::STD_ALPHABET, -1); - decode_tests(hex_tests, base32::HEX_ALPHABET, -1); + decode_tests(std_tests, &base32::STANDARD, base32::NO_PAD); + decode_tests(hex_tests, &base32::HEX, base32::NO_PAD); } fn void! base32_api()