diff --git a/hikyuu/utilities/base64.cpp b/hikyuu/utilities/base64.cpp index 09dcb1f..0e01712 100644 --- a/hikyuu/utilities/base64.cpp +++ b/hikyuu/utilities/base64.cpp @@ -1,108 +1,291 @@ /* - * Copyright (c) hikyuu.org - * - * Created on: 2020-6-2 - * Author: fasiondog - */ + base64.cpp and base64.h + + base64 encoding and decoding with C++. + More information at + https://renenyffenegger.ch/notes/development/Base64/Encoding-and-decoding-base-64-with-cpp + + Version: 2.rc.08 (release candidate) + + Copyright (C) 2004-2017, 2020, 2021 René Nyffenegger + + This source code is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + + 3. This notice may not be removed or altered from any source distribution. + + René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +*/ #include "base64.h" #include "Log.h" namespace hku { +// +// Depending on the url parameter in base64_chars, one of +// two sets of base64 characters needs to be chosen. +// They differ in their last two characters. +// +static const char* base64_chars[2] = { + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789" + "+/", -static const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; + "0123456789" + "-_"}; -std::string base64_encode(unsigned char const* bytes_to_encode, size_t in_len) { // NOSONAR - HKU_CHECK(bytes_to_encode, "Input null ptr!"); - std::string ret; - HKU_IF_RETURN(in_len == 0, ret); +static unsigned int pos_of_char(const unsigned char chr) { + // + // Return the position of chr within base64_encode() + // - int i = 0; - unsigned char char_array_3[3]; - unsigned char char_array_4[4]; - - while (in_len--) { - char_array_3[i++] = *(bytes_to_encode++); - if (i == 3) { - char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; - char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); - char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); - char_array_4[3] = char_array_3[2] & 0x3f; - - for (i = 0; (i < 4); i++) - ret += base64_chars[char_array_4[i]]; - i = 0; - } + if (chr >= 'A' && chr <= 'Z') + return chr - 'A'; + else if (chr >= 'a' && chr <= 'z') + return chr - 'a' + ('Z' - 'A') + 1; + else if (chr >= '0' && chr <= '9') + return chr - '0' + ('Z' - 'A') + ('z' - 'a') + 2; + else if (chr == '+' || chr == '-') + return 62; // Be liberal with input and accept both url ('-') and non-url ('+') base 64 + // characters ( + else if (chr == '/' || chr == '_') + return 63; // Ditto for '/' and '_' + else + // + // 2020-10-23: Throw std::exception rather than const char* + //(Pablo Martin-Gomez, https://github.com/Bouska) + // + throw std::runtime_error("Input is not valid base64-encoded data."); +} + +static std::string insert_linebreaks(std::string str, size_t distance) { + // + // Provided by https://github.com/JomaCorpFX, adapted by me. + // + if (!str.length()) { + return ""; } - if (i) { - for (int j = i; j < 3; j++) - char_array_3[j] = '\0'; + size_t pos = distance; - char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; - char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); - char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); - char_array_4[3] = char_array_3[2] & 0x3f; + while (pos < str.size()) { + str.insert(pos, "\n"); + pos += distance + 1; + } - for (int j = 0; (j < i + 1); j++) - ret += base64_chars[char_array_4[j]]; + return str; +} - while ((i++ < 3)) - ret += '='; - } +template +static std::string encode_with_line_breaks(String s) { + return insert_linebreaks(base64_encode(s, false), line_length); +} - return ret; +template +static std::string encode_pem(String s) { + return encode_with_line_breaks(s); +} + +template +static std::string encode_mime(String s) { + return encode_with_line_breaks(s); } -static inline bool is_base64(unsigned char c) { - return (isalnum(c) || (c == '+') || (c == '/')); +template +static std::string encode(String s, bool url) { + return base64_encode(reinterpret_cast(s.data()), s.length(), url); } -std::string base64_decode(unsigned char const* encoded_string, size_t in_len) { - HKU_CHECK(encoded_string, "Input null ptr!"); +std::string base64_encode(unsigned char const* bytes_to_encode, size_t in_len, bool url) { + HKU_ASSERT(bytes_to_encode); + std::string ret; HKU_IF_RETURN(in_len == 0, ret); - int i = 0; - int in_ = 0; - unsigned char char_array_4[4], char_array_3[3]; + size_t len_encoded = (in_len + 2) / 3 * 4; + + unsigned char trailing_char = url ? '.' : '='; + + // + // Choose set of base64 characters. They differ + // for the last two positions, depending on the url + // parameter. + // A bool (as is the parameter url) is guaranteed + // to evaluate to either 0 or 1 in C++ therefore, + // the correct character set is chosen by subscripting + // base64_chars with url. + // + const char* base64_chars_ = base64_chars[url]; + + ret.reserve(len_encoded); - while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { - char_array_4[i++] = encoded_string[in_]; - in_++; - if (i == 4) { - for (i = 0; i < 4; i++) - char_array_4[i] = (unsigned char)base64_chars.find(char_array_4[i]); + unsigned int pos = 0; - char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); - char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); - char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + while (pos < in_len) { + ret.push_back(base64_chars_[(bytes_to_encode[pos + 0] & 0xfc) >> 2]); - for (i = 0; (i < 3); i++) - ret += char_array_3[i]; - i = 0; + if (pos + 1 < in_len) { + ret.push_back(base64_chars_[((bytes_to_encode[pos + 0] & 0x03) << 4) + + ((bytes_to_encode[pos + 1] & 0xf0) >> 4)]); + + if (pos + 2 < in_len) { + ret.push_back(base64_chars_[((bytes_to_encode[pos + 1] & 0x0f) << 2) + + ((bytes_to_encode[pos + 2] & 0xc0) >> 6)]); + ret.push_back(base64_chars_[bytes_to_encode[pos + 2] & 0x3f]); + } else { + ret.push_back(base64_chars_[(bytes_to_encode[pos + 1] & 0x0f) << 2]); + ret.push_back(trailing_char); + } + } else { + ret.push_back(base64_chars_[(bytes_to_encode[pos + 0] & 0x03) << 4]); + ret.push_back(trailing_char); + ret.push_back(trailing_char); } + + pos += 3; } - if (i) { - for (int j = i; j < 4; j++) - char_array_4[j] = 0; + return ret; +} - for (int j = 0; j < 4; j++) - char_array_4[j] = (unsigned char)base64_chars.find(char_array_4[j]); +template +static std::string decode(String encoded_string, bool remove_linebreaks) { + // + // decode(…) is templated so that it can be used with String = const std::string& + // or std::string_view (requires at least C++17) + // - char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); - char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); - char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + if (encoded_string.empty()) + return std::string(); - for (int j = 0; (j < i - 1); j++) - ret += char_array_3[j]; + if (remove_linebreaks) { + std::string copy(encoded_string); + + copy.erase(std::remove(copy.begin(), copy.end(), '\n'), copy.end()); + + return base64_decode(copy, false); + } + + size_t length_of_string = encoded_string.length(); + size_t pos = 0; + + // + // The approximate length (bytes) of the decoded string might be one or + // two bytes smaller, depending on the amount of trailing equal signs + // in the encoded string. This approximation is needed to reserve + // enough space in the string to be returned. + // + size_t approx_length_of_decoded_string = length_of_string / 4 * 3; + std::string ret; + ret.reserve(approx_length_of_decoded_string); + + while (pos < length_of_string) { + // + // Iterate over encoded input string in chunks. The size of all + // chunks except the last one is 4 bytes. + // + // The last chunk might be padded with equal signs or dots + // in order to make it 4 bytes in size as well, but this + // is not required as per RFC 2045. + // + // All chunks except the last one produce three output bytes. + // + // The last chunk produces at least one and up to three bytes. + // + + size_t pos_of_char_1 = pos_of_char(encoded_string[pos + 1]); + + // + // Emit the first output byte that is produced in each chunk: + // + ret.push_back(static_cast( + ((pos_of_char(encoded_string[pos + 0])) << 2) + ((pos_of_char_1 & 0x30) >> 4))); + + if ((pos + 2 < length_of_string) && // Check for data that is not padded with equal signs + // (which is allowed by RFC 2045) + encoded_string[pos + 2] != '=' && + encoded_string[pos + 2] != + '.' // accept URL-safe base 64 strings, too, so check for '.' also. + ) { + // + // Emit a chunk's second byte (which might not be produced in the last chunk). + // + unsigned int pos_of_char_2 = pos_of_char(encoded_string[pos + 2]); + ret.push_back(static_cast(((pos_of_char_1 & 0x0f) << 4) + + ((pos_of_char_2 & 0x3c) >> 2))); + + if ((pos + 3 < length_of_string) && encoded_string[pos + 3] != '=' && + encoded_string[pos + 3] != '.') { + // + // Emit a chunk's third byte (which might not be produced in the last chunk). + // + ret.push_back(static_cast( + ((pos_of_char_2 & 0x03) << 6) + pos_of_char(encoded_string[pos + 3]))); + } + } + + pos += 4; } return ret; } +std::string base64_decode(std::string const& s, bool remove_linebreaks) { + return decode(s, remove_linebreaks); +} + +std::string base64_encode(std::string const& s, bool url) { + return encode(s, url); +} + +std::string base64_encode_pem(std::string const& s) { + return encode_pem(s); +} + +std::string base64_encode_mime(std::string const& s) { + return encode_mime(s); +} + +#if __cplusplus >= 201703L +// +// Interface with std::string_view rather than const std::string& +// Requires C++17 +// Provided by Yannic Bonenberger (https://github.com/Yannic) +// + +std::string base64_encode(std::string_view s, bool url) { + return encode(s, url); +} + +std::string base64_encode_pem(std::string_view s) { + return encode_pem(s); +} + +std::string base64_encode_mime(std::string_view s) { + return encode_mime(s); +} + +std::string base64_decode(std::string_view s, bool remove_linebreaks) { + return decode(s, remove_linebreaks); +} + +#endif // __cplusplus >= 201703L + } // namespace hku \ No newline at end of file diff --git a/hikyuu/utilities/base64.h b/hikyuu/utilities/base64.h index 0d5d28f..1de82bd 100644 --- a/hikyuu/utilities/base64.h +++ b/hikyuu/utilities/base64.h @@ -1,9 +1,7 @@ -/* - * Copyright (c) hikyuu.org - * - * Created on: 2020-6-2 - * Author: fasiondog - */ +// +// base64 encoding and decoding with C++. +// Version: 2.rc.08 (release candidate) +// #pragma once #ifndef HKU_UTILS_BASE64_H @@ -18,40 +16,36 @@ namespace hku { -/** - * 将二进制 bytes 数组编码成 base64 字符串 - * @param bytes_to_encode 内存起始地址 - * @param in_len 待计算的字节长度 - */ -std::string HKU_UTILS_API base64_encode(unsigned char const* bytes_to_encode, size_t in_len); - -/** - * 字符串编码为 base64 - * @param src 输入字符串 - * @note 通过 func(unsigned char *, unsigned int) 函数实现,而不是直接只提供 string_view - * 版本的原因是:c++17 string_view 处理 nullptr 时,程序会直接挂掉,无异常 - */ -inline std::string base64_encode(string_view src) { - return base64_encode((unsigned char const*)src.data(), src.size()); -} - /** * 将 base64 字符串解码 * @param encoded_string base64 编码的字符串 - * @param in_len 字符串长度 + * @param remove_linebreaks 是否移除url中的分隔符 * @return string 实际解码后的二进制内容保存在返回的字符串对象中 * @note 如果传入的base64编码字符串中含有非法字符,不会告警,仅处理到能处理的字符 */ -std::string HKU_UTILS_API base64_decode(unsigned char const* encoded_string, size_t in_len); +std::string HKU_UTILS_API base64_decode(std::string const& encoded_string, + bool remove_linebreaks = false); /** - * 将 base64 字符串解码 - * @param encoded_string base64 编码的字符串 - * @return string 实际解码后的二进制内容保存在返回的字符串对象中 + * 将二进制 bytes 数组编码成 base64 字符串 + * @param bytes_to_encode 内存起始地址 + * @param in_len 待计算的字节长度 + * @param url 是否为在 url 中使用 */ -inline std::string base64_decode(string_view encoded_string) { - return base64_decode((unsigned char const*)encoded_string.data(), encoded_string.size()); -} +std::string HKU_UTILS_API base64_encode(unsigned char const* bytes_to_encode, size_t in_len, + bool url = false); + +std::string HKU_UTILS_API base64_encode(const std::string& s, bool url = false); +std::string HKU_UTILS_API base64_encode_pem(const std::string& s); +std::string HKU_UTILS_API base64_encode_mime(const std::string& s); + +#if __cplusplus >= 201703L +std::string HKU_UTILS_API base64_encode(string_view s, bool url = false); +std::string HKU_UTILS_API base64_encode_pem(string_view s); +std::string HKU_UTILS_API base64_encode_mime(string_view s); + +std::string HKU_UTILS_API base64_decode(string_view s, bool remove_linebreaks = false); +#endif // __cplusplus >= 201703L } // namespace hku diff --git a/release.md b/release.md index 66c409a..0c95ce4 100644 --- a/release.md +++ b/release.md @@ -1,11 +1,15 @@ # 版本发布说明 -## 1.0.5 - +## 1.0.6 - + + +## 1.0.5 - 2024年9月20日 1. fixed MySQLStatement::sub_getColumnAsBlob 未正确获取 blob 长度 2. fixed HttpClient 未正确处理含有多个值的 HttpParams 3. 优化 TimerManager, 可以指定使用外部任务组 4. Datetime 新增支持 "20240822 11:30:06.230" 的字符串方式构造 +5. 调整 base64 编解码接口 ## 1.0.4 - 2024年8月6日 diff --git a/test/utilities/test_base64.cpp b/test/utilities/test_base64.cpp index edf9956..c198cd7 100644 --- a/test/utilities/test_base64.cpp +++ b/test/utilities/test_base64.cpp @@ -28,20 +28,12 @@ TEST_CASE("test_base64_encode") { } TEST_CASE("test_base64_decode") { - // C++17 string_view 无法正确处理 null_ptr - // auto x = std::string_view(nullptr); - // HKU_INFO("{}", x); - - CHECK_THROWS(base64_decode(nullptr, 10)); - // 传入空字符串 - CHECK_EQ(base64_decode(""), std::string()); - - CHECK_EQ(base64_decode("+"), std::string()); + CHECK_EQ(base64_decode(std::string("")), std::string()); - auto x = base64_decode("ABCDEFGHIJKLMNOPQRSTUVWXYZ 泉州"); - // HKU_INFO("{}", x); - HKU_INFO("{}", base64_encode("ABCDEFGHIJKLMNOPQRSTUVWXYZ 泉州")); + // 传入非法 base64 字符串 + CHECK_THROWS(base64_decode(std::string("+"))); + CHECK_THROWS(base64_decode(std::string("ABCDEFGHIJKLMNOPQRSTUVWXYZ 泉州"))); // 正常解码 std::string src("ABCDEFGHIJKLMNOPQRSTUVWXYZ 泉州 0123456789+/ 泉州 abcdefghijklmnopqrstuvwxyz");