Branch data Line data Source code
# 1 : : // Copyright (c) 2009-2010 Satoshi Nakamoto
# 2 : : // Copyright (c) 2009-2021 The Bitcoin Core developers
# 3 : : // Distributed under the MIT software license, see the accompanying
# 4 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
# 5 : :
# 6 : : /**
# 7 : : * Utilities for converting data from/to strings.
# 8 : : */
# 9 : : #ifndef BITCOIN_UTIL_STRENCODINGS_H
# 10 : : #define BITCOIN_UTIL_STRENCODINGS_H
# 11 : :
# 12 : : #include <attributes.h>
# 13 : : #include <span.h>
# 14 : : #include <util/string.h>
# 15 : :
# 16 : : #include <charconv>
# 17 : : #include <cstdint>
# 18 : : #include <iterator>
# 19 : : #include <limits>
# 20 : : #include <optional>
# 21 : : #include <string>
# 22 : : #include <vector>
# 23 : :
# 24 : : /** Used by SanitizeString() */
# 25 : : enum SafeChars
# 26 : : {
# 27 : : SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
# 28 : : SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
# 29 : : SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
# 30 : : SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
# 31 : : };
# 32 : :
# 33 : : /**
# 34 : : * Used by ParseByteUnits()
# 35 : : * Lowercase base 1000
# 36 : : * Uppercase base 1024
# 37 : : */
# 38 : : enum class ByteUnit : uint64_t {
# 39 : : NOOP = 1ULL,
# 40 : : k = 1000ULL,
# 41 : : K = 1024ULL,
# 42 : : m = 1'000'000ULL,
# 43 : : M = 1ULL << 20,
# 44 : : g = 1'000'000'000ULL,
# 45 : : G = 1ULL << 30,
# 46 : : t = 1'000'000'000'000ULL,
# 47 : : T = 1ULL << 40,
# 48 : : };
# 49 : :
# 50 : : /**
# 51 : : * Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
# 52 : : * addresses, but avoid anything even possibly remotely dangerous like & or >
# 53 : : * @param[in] str The string to sanitize
# 54 : : * @param[in] rule The set of safe chars to choose (default: least restrictive)
# 55 : : * @return A new string without unsafe chars
# 56 : : */
# 57 : : std::string SanitizeString(const std::string& str, int rule = SAFE_CHARS_DEFAULT);
# 58 : : std::vector<unsigned char> ParseHex(const char* psz);
# 59 : : std::vector<unsigned char> ParseHex(const std::string& str);
# 60 : : signed char HexDigit(char c);
# 61 : : /* Returns true if each character in str is a hex character, and has an even
# 62 : : * number of hex digits.*/
# 63 : : bool IsHex(const std::string& str);
# 64 : : /**
# 65 : : * Return true if the string is a hex number, optionally prefixed with "0x"
# 66 : : */
# 67 : : bool IsHexNumber(const std::string& str);
# 68 : : std::vector<unsigned char> DecodeBase64(const char* p, bool* pf_invalid = nullptr);
# 69 : : std::string DecodeBase64(const std::string& str, bool* pf_invalid = nullptr);
# 70 : : std::string EncodeBase64(Span<const unsigned char> input);
# 71 : 143 : inline std::string EncodeBase64(Span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
# 72 : 1964 : inline std::string EncodeBase64(const std::string& str) { return EncodeBase64(MakeUCharSpan(str)); }
# 73 : : std::vector<unsigned char> DecodeBase32(const char* p, bool* pf_invalid = nullptr);
# 74 : : std::string DecodeBase32(const std::string& str, bool* pf_invalid = nullptr);
# 75 : :
# 76 : : /**
# 77 : : * Base32 encode.
# 78 : : * If `pad` is true, then the output will be padded with '=' so that its length
# 79 : : * is a multiple of 8.
# 80 : : */
# 81 : : std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);
# 82 : :
# 83 : : /**
# 84 : : * Base32 encode.
# 85 : : * If `pad` is true, then the output will be padded with '=' so that its length
# 86 : : * is a multiple of 8.
# 87 : : */
# 88 : : std::string EncodeBase32(const std::string& str, bool pad = true);
# 89 : :
# 90 : : void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut);
# 91 : :
# 92 : : // LocaleIndependentAtoi is provided for backwards compatibility reasons.
# 93 : : //
# 94 : : // New code should use ToIntegral or the ParseInt* functions
# 95 : : // which provide parse error feedback.
# 96 : : //
# 97 : : // The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
# 98 : : // std::atoi as it behaves under the "C" locale, and remove some undefined
# 99 : : // behavior. If the parsed value is bigger than the integer type's maximum
# 100 : : // value, or smaller than the integer type's minimum value, std::atoi has
# 101 : : // undefined behavior, while this function returns the maximum or minimum
# 102 : : // values, respectively.
# 103 : : template <typename T>
# 104 : : T LocaleIndependentAtoi(const std::string& str)
# 105 : 235677 : {
# 106 : 235677 : static_assert(std::is_integral<T>::value);
# 107 : 235677 : T result;
# 108 : : // Emulate atoi(...) handling of white space and leading +/-.
# 109 : 235677 : std::string s = TrimString(str);
# 110 [ + - ][ + - ]: 235677 : if (!s.empty() && s[0] == '+') {
# [ + - ][ + - ]
# [ + + ][ + + ]
# [ + - ][ + - ]
# [ - + ][ - + ]
# [ - + ][ + + ]
# [ - + ][ + + ]
# [ - + ][ - + ]
# 111 [ # # ][ + - ]: 32 : if (s.length() >= 2 && s[1] == '-') {
# [ # # ][ # # ]
# [ # # ][ # # ]
# [ # # ][ + - ]
# [ # # ][ # # ]
# [ # # ][ # # ]
# [ # # ][ + + ]
# [ # # ][ + + ]
# 112 : 6 : return 0;
# 113 : 6 : }
# 114 : 26 : s = s.substr(1);
# 115 : 26 : }
# 116 : 235671 : auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
# 117 [ + + ][ + + ]: 235671 : if (error_condition == std::errc::result_out_of_range) {
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# 118 [ + - ][ + - ]: 40 : if (s.length() >= 1 && s[0] == '-') {
# [ + - ][ + - ]
# [ + - ][ + - ]
# [ + - ][ + - ]
# [ + + ][ - + ]
# [ + + ][ + + ]
# [ + + ][ - + ]
# [ - + ][ - + ]
# 119 : : // Saturate underflow, per strtoll's behavior.
# 120 : 16 : return std::numeric_limits<T>::min();
# 121 : 24 : } else {
# 122 : : // Saturate overflow, per strtoll's behavior.
# 123 : 24 : return std::numeric_limits<T>::max();
# 124 : 24 : }
# 125 [ + + ][ - + ]: 235631 : } else if (error_condition != std::errc{}) {
# [ + + ][ - + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# 126 : 140 : return 0;
# 127 : 140 : }
# 128 : 235491 : return result;
# 129 : 235671 : }
# 130 : :
# 131 : : /**
# 132 : : * Tests if the given character is a decimal digit.
# 133 : : * @param[in] c character to test
# 134 : : * @return true if the argument is a decimal digit; otherwise false.
# 135 : : */
# 136 : : constexpr bool IsDigit(char c)
# 137 : 691075 : {
# 138 [ + + ][ + + ]: 691075 : return c >= '0' && c <= '9';
# 139 : 691075 : }
# 140 : :
# 141 : : /**
# 142 : : * Tests if the given character is a whitespace character. The whitespace characters
# 143 : : * are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
# 144 : : * tab ('\t'), and vertical tab ('\v').
# 145 : : *
# 146 : : * This function is locale independent. Under the C locale this function gives the
# 147 : : * same result as std::isspace.
# 148 : : *
# 149 : : * @param[in] c character to test
# 150 : : * @return true if the argument is a whitespace character; otherwise false
# 151 : : */
# 152 : 140400318 : constexpr inline bool IsSpace(char c) noexcept {
# 153 [ + + ][ + + ]: 140400318 : return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
# [ + + ][ + + ]
# [ + + ][ + + ]
# 154 : 140400318 : }
# 155 : :
# 156 : : /**
# 157 : : * Convert string to integral type T. Leading whitespace, a leading +, or any
# 158 : : * trailing character fail the parsing. The required format expressed as regex
# 159 : : * is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
# 160 : : *
# 161 : : * @returns std::nullopt if the entire string could not be parsed, or if the
# 162 : : * parsed value is not in the range representable by the type T.
# 163 : : */
# 164 : : template <typename T>
# 165 : : std::optional<T> ToIntegral(const std::string& str)
# 166 : 24885 : {
# 167 : 24885 : static_assert(std::is_integral<T>::value);
# 168 : 24885 : T result;
# 169 : 24885 : const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
# 170 [ + + ][ + + ]: 24885 : if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# 171 : 728 : return std::nullopt;
# 172 : 728 : }
# 173 : 24157 : return result;
# 174 : 24885 : }
# 175 : :
# 176 : : /**
# 177 : : * Convert string to signed 32-bit integer with strict parse error feedback.
# 178 : : * @returns true if the entire string could be parsed as valid integer,
# 179 : : * false if not the entire string could be parsed or when overflow or underflow occurred.
# 180 : : */
# 181 : : [[nodiscard]] bool ParseInt32(const std::string& str, int32_t *out);
# 182 : :
# 183 : : /**
# 184 : : * Convert string to signed 64-bit integer with strict parse error feedback.
# 185 : : * @returns true if the entire string could be parsed as valid integer,
# 186 : : * false if not the entire string could be parsed or when overflow or underflow occurred.
# 187 : : */
# 188 : : [[nodiscard]] bool ParseInt64(const std::string& str, int64_t *out);
# 189 : :
# 190 : : /**
# 191 : : * Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
# 192 : : * @returns true if the entire string could be parsed as valid integer,
# 193 : : * false if not the entire string could be parsed or when overflow or underflow occurred.
# 194 : : */
# 195 : : [[nodiscard]] bool ParseUInt8(const std::string& str, uint8_t *out);
# 196 : :
# 197 : : /**
# 198 : : * Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
# 199 : : * @returns true if the entire string could be parsed as valid integer,
# 200 : : * false if the entire string could not be parsed or if overflow or underflow occurred.
# 201 : : */
# 202 : : [[nodiscard]] bool ParseUInt16(const std::string& str, uint16_t* out);
# 203 : :
# 204 : : /**
# 205 : : * Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
# 206 : : * @returns true if the entire string could be parsed as valid integer,
# 207 : : * false if not the entire string could be parsed or when overflow or underflow occurred.
# 208 : : */
# 209 : : [[nodiscard]] bool ParseUInt32(const std::string& str, uint32_t *out);
# 210 : :
# 211 : : /**
# 212 : : * Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
# 213 : : * @returns true if the entire string could be parsed as valid integer,
# 214 : : * false if not the entire string could be parsed or when overflow or underflow occurred.
# 215 : : */
# 216 : : [[nodiscard]] bool ParseUInt64(const std::string& str, uint64_t *out);
# 217 : :
# 218 : : /**
# 219 : : * Convert a span of bytes to a lower-case hexadecimal string.
# 220 : : */
# 221 : : std::string HexStr(const Span<const uint8_t> s);
# 222 : 7 : inline std::string HexStr(const Span<const char> s) { return HexStr(MakeUCharSpan(s)); }
# 223 : 6913 : inline std::string HexStr(const Span<const std::byte> s) { return HexStr(MakeUCharSpan(s)); }
# 224 : :
# 225 : : /**
# 226 : : * Format a paragraph of text to a fixed width, adding spaces for
# 227 : : * indentation to any added line.
# 228 : : */
# 229 : : std::string FormatParagraph(const std::string& in, size_t width = 79, size_t indent = 0);
# 230 : :
# 231 : : /**
# 232 : : * Timing-attack-resistant comparison.
# 233 : : * Takes time proportional to length
# 234 : : * of first argument.
# 235 : : */
# 236 : : template <typename T>
# 237 : : bool TimingResistantEqual(const T& a, const T& b)
# 238 : 137646 : {
# 239 [ + + ]: 137646 : if (b.size() == 0) return a.size() == 0;
# 240 : 137642 : size_t accumulator = a.size() ^ b.size();
# 241 [ + + ]: 10452990 : for (size_t i = 0; i < a.size(); i++)
# 242 : 10315348 : accumulator |= size_t(a[i] ^ b[i%b.size()]);
# 243 : 137642 : return accumulator == 0;
# 244 : 137646 : }
# 245 : :
# 246 : : /** Parse number as fixed point according to JSON number syntax.
# 247 : : * See https://json.org/number.gif
# 248 : : * @returns true on success, false on error.
# 249 : : * @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
# 250 : : */
# 251 : : [[nodiscard]] bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out);
# 252 : :
# 253 : : /** Convert from one power-of-2 number base to another. */
# 254 : : template<int frombits, int tobits, bool pad, typename O, typename I>
# 255 : 239093 : bool ConvertBits(const O& outfn, I it, I end) {
# 256 : 239093 : size_t acc = 0;
# 257 : 239093 : size_t bits = 0;
# 258 : 239093 : constexpr size_t maxv = (1 << tobits) - 1;
# 259 : 239093 : constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
# 260 [ + + ][ + + ]: 18304269 : while (it != end) {
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ]
# 261 : 18065176 : acc = ((acc << frombits) | *it) & max_acc;
# 262 : 18065176 : bits += frombits;
# 263 [ + + ][ + + ]: 33369175 : while (bits >= tobits) {
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ][ + + ]
# [ + + ]
# 264 : 15303999 : bits -= tobits;
# 265 : 15303999 : outfn((acc >> bits) & maxv);
# 266 : 15303999 : }
# 267 : 18065176 : ++it;
# 268 : 18065176 : }
# 269 : 239093 : if (pad) {
# 270 [ + + ][ - + ]: 74944 : if (bits) outfn((acc << (tobits - bits)) & maxv);
# [ # # ][ + - ]
# [ + - ][ + + ]
# [ + + ][ # # ]
# [ # # ]
# 271 [ + + ][ # # ]: 164149 : } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
# [ # # ][ + + ]
# [ # # ][ # # ]
# [ # # ][ # # ]
# [ - + ][ + + ]
# [ # # ][ + + ]
# [ + + ][ # # ]
# [ # # ][ # # ]
# [ # # ][ # # ]
# 272 : 20 : return false;
# 273 : 20 : }
# 274 : 239073 : return true;
# 275 : 239093 : }
# 276 : :
# 277 : : /**
# 278 : : * Converts the given character to its lowercase equivalent.
# 279 : : * This function is locale independent. It only converts uppercase
# 280 : : * characters in the standard 7-bit ASCII range.
# 281 : : * This is a feature, not a limitation.
# 282 : : *
# 283 : : * @param[in] c the character to convert to lowercase.
# 284 : : * @return the lowercase equivalent of c; or the argument
# 285 : : * if no conversion is possible.
# 286 : : */
# 287 : : constexpr char ToLower(char c)
# 288 : 232592 : {
# 289 [ + + ][ + + ]: 232592 : return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
# 290 : 232592 : }
# 291 : :
# 292 : : /**
# 293 : : * Returns the lowercase equivalent of the given string.
# 294 : : * This function is locale independent. It only converts uppercase
# 295 : : * characters in the standard 7-bit ASCII range.
# 296 : : * This is a feature, not a limitation.
# 297 : : *
# 298 : : * @param[in] str the string to convert to lowercase.
# 299 : : * @returns lowercased equivalent of str
# 300 : : */
# 301 : : std::string ToLower(const std::string& str);
# 302 : :
# 303 : : /**
# 304 : : * Converts the given character to its uppercase equivalent.
# 305 : : * This function is locale independent. It only converts lowercase
# 306 : : * characters in the standard 7-bit ASCII range.
# 307 : : * This is a feature, not a limitation.
# 308 : : *
# 309 : : * @param[in] c the character to convert to uppercase.
# 310 : : * @return the uppercase equivalent of c; or the argument
# 311 : : * if no conversion is possible.
# 312 : : */
# 313 : : constexpr char ToUpper(char c)
# 314 : 3940 : {
# 315 [ + + ][ + + ]: 3940 : return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
# 316 : 3940 : }
# 317 : :
# 318 : : /**
# 319 : : * Returns the uppercase equivalent of the given string.
# 320 : : * This function is locale independent. It only converts lowercase
# 321 : : * characters in the standard 7-bit ASCII range.
# 322 : : * This is a feature, not a limitation.
# 323 : : *
# 324 : : * @param[in] str the string to convert to uppercase.
# 325 : : * @returns UPPERCASED EQUIVALENT OF str
# 326 : : */
# 327 : : std::string ToUpper(const std::string& str);
# 328 : :
# 329 : : /**
# 330 : : * Capitalizes the first character of the given string.
# 331 : : * This function is locale independent. It only converts lowercase
# 332 : : * characters in the standard 7-bit ASCII range.
# 333 : : * This is a feature, not a limitation.
# 334 : : *
# 335 : : * @param[in] str the string to capitalize.
# 336 : : * @returns string with the first letter capitalized.
# 337 : : */
# 338 : : std::string Capitalize(std::string str);
# 339 : :
# 340 : : /**
# 341 : : * Parse a string with suffix unit [k|K|m|M|g|G|t|T].
# 342 : : * Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
# 343 : : * Lowercase units are 1000 base. Uppercase units are 1024 base.
# 344 : : * Examples: 2m,27M,19g,41T
# 345 : : *
# 346 : : * @param[in] str the string to convert into bytes
# 347 : : * @param[in] default_multiplier if no unit is found in str use this unit
# 348 : : * @returns optional uint64_t bytes from str or nullopt
# 349 : : * if ToIntegral is false, str is empty, trailing whitespace or overflow
# 350 : : */
# 351 : : std::optional<uint64_t> ParseByteUnits(const std::string& str, ByteUnit default_multiplier);
# 352 : :
# 353 : : #endif // BITCOIN_UTIL_STRENCODINGS_H
|