Branch data Line data Source code
# 1 : : // Copyright (c) 2017, 2021 Pieter Wuille
# 2 : : // Copyright (c) 2021 The Bitcoin Core developers
# 3 : : // Distributed under the MIT software license, see the accompanying
# 4 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
# 5 : :
# 6 : : #include <bech32.h>
# 7 : : #include <util/vector.h>
# 8 : :
# 9 : : #include <array>
# 10 : : #include <assert.h>
# 11 : : #include <numeric>
# 12 : : #include <optional>
# 13 : :
# 14 : : namespace bech32
# 15 : : {
# 16 : :
# 17 : : namespace
# 18 : : {
# 19 : :
# 20 : : typedef std::vector<uint8_t> data;
# 21 : :
# 22 : : /** The Bech32 and Bech32m character set for encoding. */
# 23 : : const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
# 24 : :
# 25 : : /** The Bech32 and Bech32m character set for decoding. */
# 26 : : const int8_t CHARSET_REV[128] = {
# 27 : : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
# 28 : : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
# 29 : : -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
# 30 : : 15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
# 31 : : -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
# 32 : : 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
# 33 : : -1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
# 34 : : 1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
# 35 : : };
# 36 : :
# 37 : : /** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
# 38 : : * The defining polynomial of the extension is x^2 + 9x + 23.
# 39 : : * Let (e) be a root of this defining polynomial. Then (e) is a primitive element of GF(1024),
# 40 : : * that is, a generator of the field. Every non-zero element of the field can then be represented
# 41 : : * as (e)^k for some power k.
# 42 : : * The array GF1024_EXP contains all these powers of (e) - GF1024_EXP[k] = (e)^k in GF(1024).
# 43 : : * Conversely, GF1024_LOG contains the discrete logarithms of these powers, so
# 44 : : * GF1024_LOG[GF1024_EXP[k]] == k.
# 45 : : * The following function generates the two tables GF1024_EXP and GF1024_LOG as constexprs. */
# 46 : : constexpr std::pair<std::array<int16_t, 1023>, std::array<int16_t, 1024>> GenerateGFTables()
# 47 : 0 : {
# 48 : 0 : // Build table for GF(32).
# 49 : 0 : // We use these tables to perform arithmetic in GF(32) below, when constructing the
# 50 : 0 : // tables for GF(1024).
# 51 : 0 : std::array<int8_t, 31> GF32_EXP{};
# 52 : 0 : std::array<int8_t, 32> GF32_LOG{};
# 53 : 0 :
# 54 : 0 : // fmod encodes the defining polynomial of GF(32) over GF(2), x^5 + x^3 + 1.
# 55 : 0 : // Because coefficients in GF(2) are binary digits, the coefficients are packed as 101001.
# 56 : 0 : const int fmod = 41;
# 57 : 0 :
# 58 : 0 : // Elements of GF(32) are encoded as vectors of length 5 over GF(2), that is,
# 59 : 0 : // 5 binary digits. Each element (b_4, b_3, b_2, b_1, b_0) encodes a polynomial
# 60 : 0 : // b_4*x^4 + b_3*x^3 + b_2*x^2 + b_1*x^1 + b_0 (modulo fmod).
# 61 : 0 : // For example, 00001 = 1 is the multiplicative identity.
# 62 : 0 : GF32_EXP[0] = 1;
# 63 : 0 : GF32_LOG[0] = -1;
# 64 : 0 : GF32_LOG[1] = 0;
# 65 : 0 : int v = 1;
# 66 : 0 : for (int i = 1; i < 31; ++i) {
# 67 : 0 : // Multiplication by x is the same as shifting left by 1, as
# 68 : 0 : // every coefficient of the polynomial is moved up one place.
# 69 : 0 : v = v << 1;
# 70 : 0 : // If the polynomial now has an x^5 term, we subtract fmod from it
# 71 : 0 : // to remain working modulo fmod. Subtraction is the same as XOR in characteristic
# 72 : 0 : // 2 fields.
# 73 : 0 : if (v & 32) v ^= fmod;
# 74 : 0 : GF32_EXP[i] = v;
# 75 : 0 : GF32_LOG[v] = i;
# 76 : 0 : }
# 77 : 0 :
# 78 : 0 : // Build table for GF(1024)
# 79 : 0 : std::array<int16_t, 1023> GF1024_EXP{};
# 80 : 0 : std::array<int16_t, 1024> GF1024_LOG{};
# 81 : 0 :
# 82 : 0 : GF1024_EXP[0] = 1;
# 83 : 0 : GF1024_LOG[0] = -1;
# 84 : 0 : GF1024_LOG[1] = 0;
# 85 : 0 :
# 86 : 0 : // Each element v of GF(1024) is encoded as a 10 bit integer in the following way:
# 87 : 0 : // v = v1 || v0 where v0, v1 are 5-bit integers (elements of GF(32)).
# 88 : 0 : // The element (e) is encoded as 1 || 0, to represent 1*(e) + 0. Every other element
# 89 : 0 : // a*(e) + b is represented as a || b (a and b are both GF(32) elements). Given (v),
# 90 : 0 : // we compute (e)*(v) by multiplying in the following way:
# 91 : 0 : //
# 92 : 0 : // v0' = 23*v1
# 93 : 0 : // v1' = 9*v1 + v0
# 94 : 0 : // e*v = v1' || v0'
# 95 : 0 : //
# 96 : 0 : // Where 23, 9 are GF(32) elements encoded as described above. Multiplication in GF(32)
# 97 : 0 : // is done using the log/exp tables:
# 98 : 0 : // e^x * e^y = e^(x + y) so a * b = EXP[ LOG[a] + LOG [b] ]
# 99 : 0 : // for non-zero a and b.
# 100 : 0 :
# 101 : 0 : v = 1;
# 102 : 0 : for (int i = 1; i < 1023; ++i) {
# 103 : 0 : int v0 = v & 31;
# 104 : 0 : int v1 = v >> 5;
# 105 : 0 :
# 106 : 0 : int v0n = v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(23)) % 31) : 0;
# 107 : 0 : int v1n = (v1 ? GF32_EXP.at((GF32_LOG.at(v1) + GF32_LOG.at(9)) % 31) : 0) ^ v0;
# 108 : 0 :
# 109 : 0 : v = v1n << 5 | v0n;
# 110 : 0 : GF1024_EXP[i] = v;
# 111 : 0 : GF1024_LOG[v] = i;
# 112 : 0 : }
# 113 : 0 :
# 114 : 0 : return std::make_pair(GF1024_EXP, GF1024_LOG);
# 115 : 0 : }
# 116 : :
# 117 : : constexpr auto tables = GenerateGFTables();
# 118 : : constexpr const std::array<int16_t, 1023>& GF1024_EXP = tables.first;
# 119 : : constexpr const std::array<int16_t, 1024>& GF1024_LOG = tables.second;
# 120 : :
# 121 : : /* Determine the final constant to use for the specified encoding. */
# 122 : 99209 : uint32_t EncodingConstant(Encoding encoding) {
# 123 : 99209 : assert(encoding == Encoding::BECH32 || encoding == Encoding::BECH32M);
# 124 [ + + ]: 99209 : return encoding == Encoding::BECH32 ? 1 : 0x2bc830a3;
# 125 : 99209 : }
# 126 : :
# 127 : : /** This function will compute what 6 5-bit values to XOR into the last 6 input values, in order to
# 128 : : * make the checksum 0. These 6 values are packed together in a single 30-bit integer. The higher
# 129 : : * bits correspond to earlier values. */
# 130 : : uint32_t PolyMod(const data& v)
# 131 : 98785 : {
# 132 : : // The input is interpreted as a list of coefficients of a polynomial over F = GF(32), with an
# 133 : : // implicit 1 in front. If the input is [v0,v1,v2,v3,v4], that polynomial is v(x) =
# 134 : : // 1*x^5 + v0*x^4 + v1*x^3 + v2*x^2 + v3*x + v4. The implicit 1 guarantees that
# 135 : : // [v0,v1,v2,...] has a distinct checksum from [0,v0,v1,v2,...].
# 136 : :
# 137 : : // The output is a 30-bit integer whose 5-bit groups are the coefficients of the remainder of
# 138 : : // v(x) mod g(x), where g(x) is the Bech32 generator,
# 139 : : // x^6 + {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}. g(x) is chosen in such a way
# 140 : : // that the resulting code is a BCH code, guaranteeing detection of up to 3 errors within a
# 141 : : // window of 1023 characters. Among the various possible BCH codes, one was selected to in
# 142 : : // fact guarantee detection of up to 4 errors within a window of 89 characters.
# 143 : :
# 144 : : // Note that the coefficients are elements of GF(32), here represented as decimal numbers
# 145 : : // between {}. In this finite field, addition is just XOR of the corresponding numbers. For
# 146 : : // example, {27} + {13} = {27 ^ 13} = {22}. Multiplication is more complicated, and requires
# 147 : : // treating the bits of values themselves as coefficients of a polynomial over a smaller field,
# 148 : : // GF(2), and multiplying those polynomials mod a^5 + a^3 + 1. For example, {5} * {26} =
# 149 : : // (a^2 + 1) * (a^4 + a^3 + a) = (a^4 + a^3 + a) * a^2 + (a^4 + a^3 + a) = a^6 + a^5 + a^4 + a
# 150 : : // = a^3 + 1 (mod a^5 + a^3 + 1) = {9}.
# 151 : :
# 152 : : // During the course of the loop below, `c` contains the bitpacked coefficients of the
# 153 : : // polynomial constructed from just the values of v that were processed so far, mod g(x). In
# 154 : : // the above example, `c` initially corresponds to 1 mod g(x), and after processing 2 inputs of
# 155 : : // v, it corresponds to x^2 + v0*x + v1 mod g(x). As 1 mod g(x) = 1, that is the starting value
# 156 : : // for `c`.
# 157 : :
# 158 : : // The following Sage code constructs the generator used:
# 159 : : //
# 160 : : // B = GF(2) # Binary field
# 161 : : // BP.<b> = B[] # Polynomials over the binary field
# 162 : : // F_mod = b**5 + b**3 + 1
# 163 : : // F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition
# 164 : : // FP.<x> = F[] # Polynomials over GF(32)
# 165 : : // E_mod = x**2 + F.fetch_int(9)*x + F.fetch_int(23)
# 166 : : // E.<e> = F.extension(E_mod) # GF(1024) extension field definition
# 167 : : // for p in divisors(E.order() - 1): # Verify e has order 1023.
# 168 : : // assert((e**p == 1) == (p % 1023 == 0))
# 169 : : // G = lcm([(e**i).minpoly() for i in range(997,1000)])
# 170 : : // print(G) # Print out the generator
# 171 : : //
# 172 : : // It demonstrates that g(x) is the least common multiple of the minimal polynomials
# 173 : : // of 3 consecutive powers (997,998,999) of a primitive element (e) of GF(1024).
# 174 : : // That guarantees it is, in fact, the generator of a primitive BCH code with cycle
# 175 : : // length 1023 and distance 4. See https://en.wikipedia.org/wiki/BCH_code for more details.
# 176 : :
# 177 : 98785 : uint32_t c = 1;
# 178 [ + + ]: 4958132 : for (const auto v_i : v) {
# 179 : : // We want to update `c` to correspond to a polynomial with one extra term. If the initial
# 180 : : // value of `c` consists of the coefficients of c(x) = f(x) mod g(x), we modify it to
# 181 : : // correspond to c'(x) = (f(x) * x + v_i) mod g(x), where v_i is the next input to
# 182 : : // process. Simplifying:
# 183 : : // c'(x) = (f(x) * x + v_i) mod g(x)
# 184 : : // ((f(x) mod g(x)) * x + v_i) mod g(x)
# 185 : : // (c(x) * x + v_i) mod g(x)
# 186 : : // If c(x) = c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5, we want to compute
# 187 : : // c'(x) = (c0*x^5 + c1*x^4 + c2*x^3 + c3*x^2 + c4*x + c5) * x + v_i mod g(x)
# 188 : : // = c0*x^6 + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i mod g(x)
# 189 : : // = c0*(x^6 mod g(x)) + c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i
# 190 : : // If we call (x^6 mod g(x)) = k(x), this can be written as
# 191 : : // c'(x) = (c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i) + c0*k(x)
# 192 : :
# 193 : : // First, determine the value of c0:
# 194 : 4958132 : uint8_t c0 = c >> 25;
# 195 : :
# 196 : : // Then compute c1*x^5 + c2*x^4 + c3*x^3 + c4*x^2 + c5*x + v_i:
# 197 : 4958132 : c = ((c & 0x1ffffff) << 5) ^ v_i;
# 198 : :
# 199 : : // Finally, for each set bit n in c0, conditionally add {2^n}k(x). These constants can be
# 200 : : // computed using the following Sage code (continuing the code above):
# 201 : : //
# 202 : : // for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(g(x) mod x^6), packed in hex integers.
# 203 : : // v = 0
# 204 : : // for coef in reversed((F.fetch_int(i)*(G % x**6)).coefficients(sparse=True)):
# 205 : : // v = v*32 + coef.integer_representation()
# 206 : : // print("0x%x" % v)
# 207 : : //
# 208 [ + + ]: 4958132 : if (c0 & 1) c ^= 0x3b6a57b2; // k(x) = {29}x^5 + {22}x^4 + {20}x^3 + {21}x^2 + {29}x + {18}
# 209 [ + + ]: 4958132 : if (c0 & 2) c ^= 0x26508e6d; // {2}k(x) = {19}x^5 + {5}x^4 + x^3 + {3}x^2 + {19}x + {13}
# 210 [ + + ]: 4958132 : if (c0 & 4) c ^= 0x1ea119fa; // {4}k(x) = {15}x^5 + {10}x^4 + {2}x^3 + {6}x^2 + {15}x + {26}
# 211 [ + + ]: 4958132 : if (c0 & 8) c ^= 0x3d4233dd; // {8}k(x) = {30}x^5 + {20}x^4 + {4}x^3 + {12}x^2 + {30}x + {29}
# 212 [ + + ]: 4958132 : if (c0 & 16) c ^= 0x2a1462b3; // {16}k(x) = {21}x^5 + x^4 + {8}x^3 + {24}x^2 + {21}x + {19}
# 213 : :
# 214 : 4958132 : }
# 215 : 98785 : return c;
# 216 : 98785 : }
# 217 : :
# 218 : : /** Syndrome computes the values s_j = R(e^j) for j in [997, 998, 999]. As described above, the
# 219 : : * generator polynomial G is the LCM of the minimal polynomials of (e)^997, (e)^998, and (e)^999.
# 220 : : *
# 221 : : * Consider a codeword with errors, of the form R(x) = C(x) + E(x). The residue is the bit-packed
# 222 : : * result of computing R(x) mod G(X), where G is the generator of the code. Because C(x) is a valid
# 223 : : * codeword, it is a multiple of G(X), so the residue is in fact just E(x) mod G(x). Note that all
# 224 : : * of the (e)^j are roots of G(x) by definition, so R((e)^j) = E((e)^j).
# 225 : : *
# 226 : : * Let R(x) = r1*x^5 + r2*x^4 + r3*x^3 + r4*x^2 + r5*x + r6
# 227 : : *
# 228 : : * To compute R((e)^j), we are really computing:
# 229 : : * r1*(e)^(j*5) + r2*(e)^(j*4) + r3*(e)^(j*3) + r4*(e)^(j*2) + r5*(e)^j + r6
# 230 : : *
# 231 : : * Now note that all of the (e)^(j*i) for i in [5..0] are constants and can be precomputed.
# 232 : : * But even more than that, we can consider each coefficient as a bit-string.
# 233 : : * For example, take r5 = (b_5, b_4, b_3, b_2, b_1) written out as 5 bits. Then:
# 234 : : * r5*(e)^j = b_1*(e)^j + b_2*(2*(e)^j) + b_3*(4*(e)^j) + b_4*(8*(e)^j) + b_5*(16*(e)^j)
# 235 : : * where all the (2^i*(e)^j) are constants and can be precomputed.
# 236 : : *
# 237 : : * Then we just add each of these corresponding constants to our final value based on the
# 238 : : * bit values b_i. This is exactly what is done in the Syndrome function below.
# 239 : : */
# 240 : 0 : constexpr std::array<uint32_t, 25> GenerateSyndromeConstants() {
# 241 : 0 : std::array<uint32_t, 25> SYNDROME_CONSTS{};
# 242 : 0 : for (int k = 1; k < 6; ++k) {
# 243 : 0 : for (int shift = 0; shift < 5; ++shift) {
# 244 : 0 : int16_t b = GF1024_LOG.at(1 << shift);
# 245 : 0 : int16_t c0 = GF1024_EXP.at((997*k + b) % 1023);
# 246 : 0 : int16_t c1 = GF1024_EXP.at((998*k + b) % 1023);
# 247 : 0 : int16_t c2 = GF1024_EXP.at((999*k + b) % 1023);
# 248 : 0 : uint32_t c = c2 << 20 | c1 << 10 | c0;
# 249 : 0 : int ind = 5*(k-1) + shift;
# 250 : 0 : SYNDROME_CONSTS[ind] = c;
# 251 : 0 : }
# 252 : 0 : }
# 253 : 0 : return SYNDROME_CONSTS;
# 254 : 0 : }
# 255 : : constexpr std::array<uint32_t, 25> SYNDROME_CONSTS = GenerateSyndromeConstants();
# 256 : :
# 257 : : /**
# 258 : : * Syndrome returns the three values s_997, s_998, and s_999 described above,
# 259 : : * packed into a 30-bit integer, where each group of 10 bits encodes one value.
# 260 : : */
# 261 : 62 : uint32_t Syndrome(const uint32_t residue) {
# 262 : : // low is the first 5 bits, corresponding to the r6 in the residue
# 263 : : // (the constant term of the polynomial).
# 264 : 62 : uint32_t low = residue & 0x1f;
# 265 : :
# 266 : : // We begin by setting s_j = low = r6 for all three values of j, because these are unconditional.
# 267 : 62 : uint32_t result = low ^ (low << 10) ^ (low << 20);
# 268 : :
# 269 : : // Then for each following bit, we add the corresponding precomputed constant if the bit is 1.
# 270 : : // For example, 0x31edd3c4 is 1100011110 1101110100 1111000100 when unpacked in groups of 10
# 271 : : // bits, corresponding exactly to a^999 || a^998 || a^997 (matching the corresponding values in
# 272 : : // GF1024_EXP above). In this way, we compute all three values of s_j for j in (997, 998, 999)
# 273 : : // simultaneously. Recall that XOR corresponds to addition in a characteristic 2 field.
# 274 [ + + ]: 1612 : for (int i = 0; i < 25; ++i) {
# 275 [ + + ]: 1550 : result ^= ((residue >> (5+i)) & 1 ? SYNDROME_CONSTS.at(i) : 0);
# 276 : 1550 : }
# 277 : 62 : return result;
# 278 : 62 : }
# 279 : :
# 280 : : /** Convert to lower case. */
# 281 : : inline unsigned char LowerCase(unsigned char c)
# 282 : 103744 : {
# 283 [ + + ][ + + ]: 103744 : return (c >= 'A' && c <= 'Z') ? (c - 'A') + 'a' : c;
# 284 : 103744 : }
# 285 : :
# 286 : : /** Return indices of invalid characters in a Bech32 string. */
# 287 : : bool CheckCharacters(const std::string& str, std::vector<int>& errors)
# 288 : 26089 : {
# 289 : 26089 : bool lower = false, upper = false;
# 290 [ + + ]: 1183323 : for (size_t i = 0; i < str.size(); ++i) {
# 291 : 1157234 : unsigned char c{(unsigned char)(str[i])};
# 292 [ + + ][ + + ]: 1157234 : if (c >= 'a' && c <= 'z') {
# 293 [ + + ]: 848712 : if (upper) {
# 294 : 4 : errors.push_back(i);
# 295 : 848708 : } else {
# 296 : 848708 : lower = true;
# 297 : 848708 : }
# 298 [ + + ][ + + ]: 848712 : } else if (c >= 'A' && c <= 'Z') {
# 299 [ + + ]: 3189 : if (lower) {
# 300 : 112 : errors.push_back(i);
# 301 : 3077 : } else {
# 302 : 3077 : upper = true;
# 303 : 3077 : }
# 304 [ + + ][ + + ]: 305333 : } else if (c < 33 || c > 126) {
# 305 : 28 : errors.push_back(i);
# 306 : 28 : }
# 307 : 1157234 : }
# 308 : 26089 : return errors.empty();
# 309 : 26089 : }
# 310 : :
# 311 : : /** Expand a HRP for use in checksum computation. */
# 312 : : data ExpandHRP(const std::string& hrp)
# 313 : 98785 : {
# 314 : 98785 : data ret;
# 315 : 98785 : ret.reserve(hrp.size() + 90);
# 316 : 98785 : ret.resize(hrp.size() * 2 + 1);
# 317 [ + + ]: 493889 : for (size_t i = 0; i < hrp.size(); ++i) {
# 318 : 395104 : unsigned char c = hrp[i];
# 319 : 395104 : ret[i] = c >> 5;
# 320 : 395104 : ret[i + hrp.size() + 1] = c & 0x1f;
# 321 : 395104 : }
# 322 : 98785 : ret[hrp.size()] = 0;
# 323 : 98785 : return ret;
# 324 : 98785 : }
# 325 : :
# 326 : : /** Verify a checksum. */
# 327 : : Encoding VerifyChecksum(const std::string& hrp, const data& values)
# 328 : 25911 : {
# 329 : : // PolyMod computes what value to xor into the final values to make the checksum 0. However,
# 330 : : // if we required that the checksum was 0, it would be the case that appending a 0 to a valid
# 331 : : // list of values would result in a new valid list. For that reason, Bech32 requires the
# 332 : : // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
# 333 : : // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
# 334 : 25911 : const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values));
# 335 [ + + ]: 25911 : if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
# 336 [ + + ]: 424 : if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
# 337 : 29 : return Encoding::INVALID;
# 338 : 424 : }
# 339 : :
# 340 : : /** Create a checksum. */
# 341 : : data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
# 342 : 72792 : {
# 343 : 72792 : data enc = Cat(ExpandHRP(hrp), values);
# 344 : 72792 : enc.resize(enc.size() + 6); // Append 6 zeroes
# 345 : 72792 : uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
# 346 : 72792 : data ret(6);
# 347 [ + + ]: 509544 : for (size_t i = 0; i < 6; ++i) {
# 348 : : // Convert the 5-bit groups in mod to checksum values.
# 349 : 436752 : ret[i] = (mod >> (5 * (5 - i))) & 31;
# 350 : 436752 : }
# 351 : 72792 : return ret;
# 352 : 72792 : }
# 353 : :
# 354 : : } // namespace
# 355 : :
# 356 : : /** Encode a Bech32 or Bech32m string. */
# 357 : 72792 : std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
# 358 : : // First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder
# 359 : : // to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the
# 360 : : // result will always be invalid.
# 361 [ + + ]: 72792 : for (const char& c : hrp) assert(c < 'A' || c > 'Z');
# 362 : 72792 : data checksum = CreateChecksum(encoding, hrp, values);
# 363 : 72792 : data combined = Cat(values, checksum);
# 364 : 72792 : std::string ret = hrp + '1';
# 365 : 72792 : ret.reserve(ret.size() + combined.size());
# 366 [ + + ]: 3042372 : for (const auto c : combined) {
# 367 : 3042372 : ret += CHARSET[c];
# 368 : 3042372 : }
# 369 : 72792 : return ret;
# 370 : 72792 : }
# 371 : :
# 372 : : /** Decode a Bech32 or Bech32m string. */
# 373 : 25978 : DecodeResult Decode(const std::string& str) {
# 374 : 25978 : std::vector<int> errors;
# 375 [ + + ]: 25978 : if (!CheckCharacters(str, errors)) return {};
# 376 : 25948 : size_t pos = str.rfind('1');
# 377 [ + + ][ + + ]: 25948 : if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
# [ + + ][ + + ]
# 378 : 26 : return {};
# 379 : 26 : }
# 380 : 25922 : data values(str.size() - 1 - pos);
# 381 [ + + ]: 1049626 : for (size_t i = 0; i < str.size() - 1 - pos; ++i) {
# 382 : 1023715 : unsigned char c = str[i + pos + 1];
# 383 : 1023715 : int8_t rev = CHARSET_REV[c];
# 384 : :
# 385 [ + + ]: 1023715 : if (rev == -1) {
# 386 : 11 : return {};
# 387 : 11 : }
# 388 : 1023704 : values[i] = rev;
# 389 : 1023704 : }
# 390 : 25911 : std::string hrp;
# 391 [ + + ]: 129479 : for (size_t i = 0; i < pos; ++i) {
# 392 : 103568 : hrp += LowerCase(str[i]);
# 393 : 103568 : }
# 394 : 25911 : Encoding result = VerifyChecksum(hrp, values);
# 395 [ + + ]: 25911 : if (result == Encoding::INVALID) return {};
# 396 : 25882 : return {result, std::move(hrp), data(values.begin(), values.end() - 6)};
# 397 : 25911 : }
# 398 : :
# 399 : : /** Find index of an incorrect character in a Bech32 string. */
# 400 : 116 : std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
# 401 : 116 : std::vector<int> error_locations{};
# 402 : :
# 403 [ + + ]: 116 : if (str.size() > 90) {
# 404 : 5 : error_locations.resize(str.size() - 90);
# 405 : 5 : std::iota(error_locations.begin(), error_locations.end(), 90);
# 406 : 5 : return std::make_pair("Bech32 string too long", std::move(error_locations));
# 407 : 5 : }
# 408 : :
# 409 [ + + ]: 111 : if (!CheckCharacters(str, error_locations)){
# 410 : 30 : return std::make_pair("Invalid character or mixed case", std::move(error_locations));
# 411 : 30 : }
# 412 : :
# 413 : 81 : size_t pos = str.rfind('1');
# 414 [ + + ]: 81 : if (pos == str.npos) {
# 415 : 5 : return std::make_pair("Missing separator", std::vector<int>{});
# 416 : 5 : }
# 417 [ + + ][ + + ]: 76 : if (pos == 0 || pos + 7 > str.size()) {
# 418 : 16 : error_locations.push_back(pos);
# 419 : 16 : return std::make_pair("Invalid separator position", std::move(error_locations));
# 420 : 16 : }
# 421 : :
# 422 : 60 : std::string hrp;
# 423 [ + + ]: 236 : for (size_t i = 0; i < pos; ++i) {
# 424 : 176 : hrp += LowerCase(str[i]);
# 425 : 176 : }
# 426 : :
# 427 : 60 : size_t length = str.size() - 1 - pos; // length of data part
# 428 : 60 : data values(length);
# 429 [ + + ]: 1914 : for (size_t i = pos + 1; i < str.size(); ++i) {
# 430 : 1865 : unsigned char c = str[i];
# 431 : 1865 : int8_t rev = CHARSET_REV[c];
# 432 [ + + ]: 1865 : if (rev == -1) {
# 433 : 11 : error_locations.push_back(i);
# 434 : 11 : return std::make_pair("Invalid Base 32 character", std::move(error_locations));
# 435 : 11 : }
# 436 : 1854 : values[i - pos - 1] = rev;
# 437 : 1854 : }
# 438 : :
# 439 : : // We attempt error detection with both bech32 and bech32m, and choose the one with the fewest errors
# 440 : : // We can't simply use the segwit version, because that may be one of the errors
# 441 : 49 : std::optional<Encoding> error_encoding;
# 442 [ + + ]: 82 : for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
# 443 : 82 : std::vector<int> possible_errors;
# 444 : : // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
# 445 : : // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
# 446 : 82 : uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
# 447 : :
# 448 : : // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
# 449 : : // constant) should be 0 - hence 0 indicates there are no errors present.
# 450 [ + + ]: 82 : if (residue != 0) {
# 451 : : // If errors are present, our polynomial must be of the form C(x) + E(x) where C is the valid
# 452 : : // codeword (a multiple of G(x)), and E encodes the errors.
# 453 : 62 : uint32_t syn = Syndrome(residue);
# 454 : :
# 455 : : // Unpack the three 10-bit syndrome values
# 456 : 62 : int s0 = syn & 0x3FF;
# 457 : 62 : int s1 = (syn >> 10) & 0x3FF;
# 458 : 62 : int s2 = syn >> 20;
# 459 : :
# 460 : : // Get the discrete logs of these values in GF1024 for more efficient computation
# 461 : 62 : int l_s0 = GF1024_LOG.at(s0);
# 462 : 62 : int l_s1 = GF1024_LOG.at(s1);
# 463 : 62 : int l_s2 = GF1024_LOG.at(s2);
# 464 : :
# 465 : : // First, suppose there is only a single error. Then E(x) = e1*x^p1 for some position p1
# 466 : : // Then s0 = E((e)^997) = e1*(e)^(997*p1) and s1 = E((e)^998) = e1*(e)^(998*p1)
# 467 : : // Therefore s1/s0 = (e)^p1, and by the same logic, s2/s1 = (e)^p1 too.
# 468 : : // Hence, s1^2 == s0*s2, which is exactly the condition we check first:
# 469 [ + - ][ + - ]: 62 : if (l_s0 != -1 && l_s1 != -1 && l_s2 != -1 && (2 * l_s1 - l_s2 - l_s0 + 2046) % 1023 == 0) {
# [ + - ][ + + ]
# 470 : : // Compute the error position p1 as l_s1 - l_s0 = p1 (mod 1023)
# 471 : 23 : size_t p1 = (l_s1 - l_s0 + 1023) % 1023; // the +1023 ensures it is positive
# 472 : : // Now because s0 = e1*(e)^(997*p1), we get e1 = s0/((e)^(997*p1)). Remember that (e)^1023 = 1,
# 473 : : // so 1/((e)^997) = (e)^(1023-997).
# 474 : 23 : int l_e1 = l_s0 + (1023 - 997) * p1;
# 475 : : // Finally, some sanity checks on the result:
# 476 : : // - The error position should be within the length of the data
# 477 : : // - e1 should be in GF(32), which implies that e1 = (e)^(33k) for some k (the 31 non-zero elements
# 478 : : // of GF(32) form an index 33 subgroup of the 1023 non-zero elements of GF(1024)).
# 479 [ + + ][ + - ]: 23 : if (p1 < length && !(l_e1 % 33)) {
# 480 : : // Polynomials run from highest power to lowest, so the index p1 is from the right.
# 481 : : // We don't return e1 because it is dangerous to suggest corrections to the user,
# 482 : : // the user should check the address themselves.
# 483 : 19 : possible_errors.push_back(str.size() - p1 - 1);
# 484 : 19 : }
# 485 : : // Otherwise, suppose there are two errors. Then E(x) = e1*x^p1 + e2*x^p2.
# 486 : 39 : } else {
# 487 : : // For all possible first error positions p1
# 488 [ + + ]: 1371 : for (size_t p1 = 0; p1 < length; ++p1) {
# 489 : : // We have guessed p1, and want to solve for p2. Recall that E(x) = e1*x^p1 + e2*x^p2, so
# 490 : : // s0 = E((e)^997) = e1*(e)^(997^p1) + e2*(e)^(997*p2), and similar for s1 and s2.
# 491 : : //
# 492 : : // Consider s2 + s1*(e)^p1
# 493 : : // = 2e1*(e)^(999^p1) + e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
# 494 : : // = e2*(e)^(999*p2) + e2*(e)^(998*p2)*(e)^p1
# 495 : : // (Because we are working in characteristic 2.)
# 496 : : // = e2*(e)^(998*p2) ((e)^p2 + (e)^p1)
# 497 : : //
# 498 [ - + ]: 1338 : int s2_s1p1 = s2 ^ (s1 == 0 ? 0 : GF1024_EXP.at((l_s1 + p1) % 1023));
# 499 [ - + ]: 1338 : if (s2_s1p1 == 0) continue;
# 500 : 1338 : int l_s2_s1p1 = GF1024_LOG.at(s2_s1p1);
# 501 : :
# 502 : : // Similarly, s1 + s0*(e)^p1
# 503 : : // = e2*(e)^(997*p2) ((e)^p2 + (e)^p1)
# 504 [ - + ]: 1338 : int s1_s0p1 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p1) % 1023));
# 505 [ - + ]: 1338 : if (s1_s0p1 == 0) continue;
# 506 : 1338 : int l_s1_s0p1 = GF1024_LOG.at(s1_s0p1);
# 507 : :
# 508 : : // So, putting these together, we can compute the second error position as
# 509 : : // (e)^p2 = (s2 + s1^p1)/(s1 + s0^p1)
# 510 : : // p2 = log((e)^p2)
# 511 : 1338 : size_t p2 = (l_s2_s1p1 - l_s1_s0p1 + 1023) % 1023;
# 512 : :
# 513 : : // Sanity checks that p2 is a valid position and not the same as p1
# 514 [ + + ][ + + ]: 1338 : if (p2 >= length || p1 == p2) continue;
# 515 : :
# 516 : : // Now we want to compute the error values e1 and e2.
# 517 : : // Similar to above, we compute s1 + s0*(e)^p2
# 518 : : // = e1*(e)^(997*p1) ((e)^p1 + (e)^p2)
# 519 [ - + ]: 74 : int s1_s0p2 = s1 ^ (s0 == 0 ? 0 : GF1024_EXP.at((l_s0 + p2) % 1023));
# 520 [ - + ]: 74 : if (s1_s0p2 == 0) continue;
# 521 : 74 : int l_s1_s0p2 = GF1024_LOG.at(s1_s0p2);
# 522 : :
# 523 : : // And compute (the log of) 1/((e)^p1 + (e)^p2))
# 524 : 74 : int inv_p1_p2 = 1023 - GF1024_LOG.at(GF1024_EXP.at(p1) ^ GF1024_EXP.at(p2));
# 525 : :
# 526 : : // Then (s1 + s0*(e)^p1) * (1/((e)^p1 + (e)^p2)))
# 527 : : // = e2*(e)^(997*p2)
# 528 : : // Then recover e2 by dividing by (e)^(997*p2)
# 529 : 74 : int l_e2 = l_s1_s0p1 + inv_p1_p2 + (1023 - 997) * p2;
# 530 : : // Check that e2 is in GF(32)
# 531 [ + + ]: 74 : if (l_e2 % 33) continue;
# 532 : :
# 533 : : // In the same way, (s1 + s0*(e)^p2) * (1/((e)^p1 + (e)^p2)))
# 534 : : // = e1*(e)^(997*p1)
# 535 : : // So recover e1 by dividing by (e)^(997*p1)
# 536 : 19 : int l_e1 = l_s1_s0p2 + inv_p1_p2 + (1023 - 997) * p1;
# 537 : : // Check that e1 is in GF(32)
# 538 [ + + ]: 19 : if (l_e1 % 33) continue;
# 539 : :
# 540 : : // Again, we do not return e1 or e2 for safety.
# 541 : : // Order the error positions from the left of the string and return them
# 542 [ - + ]: 6 : if (p1 > p2) {
# 543 : 0 : possible_errors.push_back(str.size() - p1 - 1);
# 544 : 0 : possible_errors.push_back(str.size() - p2 - 1);
# 545 : 6 : } else {
# 546 : 6 : possible_errors.push_back(str.size() - p2 - 1);
# 547 : 6 : possible_errors.push_back(str.size() - p1 - 1);
# 548 : 6 : }
# 549 : 6 : break;
# 550 : 19 : }
# 551 : 39 : }
# 552 : 62 : } else {
# 553 : : // No errors
# 554 : 20 : return std::make_pair("", std::vector<int>{});
# 555 : 20 : }
# 556 : :
# 557 [ + + ][ - + ]: 62 : if (error_locations.empty() || (!possible_errors.empty() && possible_errors.size() < error_locations.size())) {
# [ # # ]
# 558 : 41 : error_locations = std::move(possible_errors);
# 559 [ + + ]: 41 : if (!error_locations.empty()) error_encoding = encoding;
# 560 : 41 : }
# 561 : 62 : }
# 562 [ + + ]: 29 : std::string error_message = error_encoding == Encoding::BECH32M ? "Invalid Bech32m checksum"
# 563 [ + + ]: 29 : : error_encoding == Encoding::BECH32 ? "Invalid Bech32 checksum"
# 564 : 25 : : "Invalid checksum";
# 565 : :
# 566 : 29 : return std::make_pair(error_message, std::move(error_locations));
# 567 : 49 : }
# 568 : :
# 569 : : } // namespace bech32
|