4
\$\begingroup\$

This is obsoleted by a significantly better version.

The following code is an implementation of hexadecimal conversions as range adaptors in C++23. It can be used at compile time, but should of course also work at run time.

I'll take any feedback, but I am especially interested in feedback from people who have experience of std::ranges, functional programming and lazy evaluation. I have not checked any performance aspect yet, among others because I yet have to use it for run time calculations. The decoding is however based on a lookup table, so I have given performance a little attention.

The same code, in principle, is available at base16. That repository should be possible to include in a any CMake project, and when built as a stand alone project, it tests the implementation at compile time.

Also on godbolt.

#include <algorithm>
#include <cstdint>
#include <ranges>
#include <string_view>

// @brief Adaptor that converts its std::byte input range into a hexadecimal character view.
//
// Usage example:
//
// \code{.cpp}
// std::array{std::byte{1}, std::byte{2}, std::byte{10}} | encode16 // will generate view on "01020A"sv
// \endcode
inline constexpr auto encode16 = [] {
    using namespace std::literals;
    using namespace std::views;

    return transform([](std::byte byte) {
               return iota(0, 2) | reverse | transform([byte](int i) -> uint8_t {
                      return (static_cast<uint8_t>(byte) >> (4 * i)) & 0xF;
                  });
           }) |
           join | transform([](uint8_t nible) -> char { return "0123456789ABCDEF"sv.at(nible); });
}();

// @brief Adaptor that converts its char input range into a std::byte range view.
//
// @warning Does not validate its input! Any invalid character will be transformed into a zero nibble. Do not use on
// non validated input if validation matters!
//
// Usage example:
//
// \code{.cpp}
// "01020A"sv | decode16 // will generate view on std::array{std::byte{1}, std::byte{2}, std::byte{10}}
// \endcode
inline constexpr auto decode16 = [] {
    using namespace std::views;

    return transform([](char hex) -> uint8_t {
               static constexpr auto u = std::bit_cast<char, uint8_t>;
               static constexpr auto lookup = [] -> std::array<uint8_t, 256> {
                   std::array<uint8_t, 256> a{};
                   a[u('0')] = 0x0, a[u('1')] = 0x1, a[u('2')] = 0x2, a[u('3')] = 0x3, a[u('4')] = 0x4,
                   a[u('5')] = 0x5, a[u('6')] = 0x6, a[u('7')] = 0x7, a[u('8')] = 0x8, a[u('9')] = 0x9,
                   a[u('A')] = 0xA, a[u('B')] = 0xB, a[u('C')] = 0xC, a[u('D')] = 0xD, a[u('E')] = 0xE,
                   a[u('F')] = 0xF, a[u('a')] = 0xA, a[u('b')] = 0xB, a[u('c')] = 0xC, a[u('d')] = 0xD,
                   a[u('e')] = 0xE, a[u('f')] = 0xF;
                   return a;
               }();
               return lookup.at(u(hex));
           }) |
           chunk(2) | transform([](auto &&pair) -> std::byte {
               return static_cast<std::byte>(std::ranges::fold_left(
               pair, 0, [](uint8_t n1, uint8_t n2) -> uint8_t { return (n1 << 4) | n2; }));
           });
}();

auto main() -> int
{
    using namespace std::literals;
    using namespace std::views;
    using std::ranges::equal;

    constexpr auto to_bytes = transform([](uint8_t num) -> std::byte { return static_cast<std::byte>(num); });

    static_assert(equal(std::array{1, 192, 255, 238} | to_bytes | encode16, "01C0FFEE"sv));
    static_assert(equal("01C0ffee"sv | decode16, std::array{1, 192, 255, 238} | to_bytes));

    static_assert(equal(std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes | encode16,
                "0123456789ABCDEF"sv));
    static_assert(equal("0123456789aBCdEf"sv | decode16,
                std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes));

    static_assert(equal(std::array{1, 192, 255, 238} | to_bytes | encode16 | decode16,
                std::array{1, 192, 255, 238} | to_bytes));
    static_assert(equal("01C0ffee"sv | decode16 | encode16, "01C0FFEE"sv));

    static_assert(equal(std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes | encode16 | decode16,
                std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes));
    static_assert(equal("0123456789aBCdEf"sv | decode16 | encode16, "0123456789ABCDEF"sv));

    return 0;
}
\$\endgroup\$
1
  • \$\begingroup\$ Sorry for the rollback - it popped up as an answer invalidation, but as it's closed anyway, probably not necessary. \$\endgroup\$ Commented Sep 22, 2024 at 15:43

1 Answer 1

2
\$\begingroup\$

Just found an improvement: hard-coding the lookup calculation is just silly.

I updated the godbolt version

#include <algorithm>
#include <cstdint>
#include <ranges>
#include <string_view>

namespace detail {
inline constexpr std::string_view base16_chars{"0123456789ABCDEF"};
}

// @brief Adaptor that converts its std::byte input range into a hexadecimal character view.
//
// Usage example:
//
// \code{.cpp}
// std::array{std::byte{1}, std::byte{2}, std::byte{10}} | encode16 // will generate view on "01020A"sv
// \endcode
inline constexpr auto encode16 = [] {
    using namespace std::views;

    return transform([](std::byte byte) {
               return iota(0, 2) | reverse | transform([byte](int i) -> uint8_t {
                      return (static_cast<uint8_t>(byte) >> (4 * i)) & 0xF;
                  });
           }) |
           join | transform([](uint8_t nible) -> char { return detail::base16_chars.at(nible); });
}();

// @brief Adaptor that converts its char input range into a std::byte range view.
//
// @warning Does not validate its input! Any invalid character will be transformed into a zero nibble. Do not use on
// non validated input if validation matters!
//
// @note For convenience, this conversion accepts both upper and lower case for hexadecimal characters a to f.
//
// Usage example:
//
// \code{.cpp}
// "01020A"sv | decode16 // will generate view on std::array{std::byte{1}, std::byte{2}, std::byte{10}}
// \endcode
inline constexpr auto decode16 = [] {
    using namespace std::literals;
    using namespace std::views;

    return transform([](char hex) -> uint8_t {
               static constexpr auto u = std::bit_cast<char, uint8_t>;
               static constexpr auto lookup = [] -> std::array<uint8_t, 256> {
                   std::array<uint8_t, 256> a{};
                   uint8_t i{};
                   for (const auto c : detail::base16_chars) {
                       a.at(u(c)) = i++;
                   }
                   i = 0xA;
                   for (const auto c : "abcdef"sv) {
                       a.at(u(c)) = i++;
                   }
                   return a;
               }();
               return lookup.at(u(hex));
           }) |
           chunk(2) | transform([](auto &&pair) -> std::byte {
               return static_cast<std::byte>(std::ranges::fold_left(
               pair, 0, [](uint8_t n1, uint8_t n2) -> uint8_t { return (n1 << 4) | n2; }));
           });
}();

auto main() -> int
{
    using namespace std::literals;
    using namespace std::views;
    using std::ranges::equal;

    constexpr auto to_bytes = transform([](uint8_t num) -> std::byte { return static_cast<std::byte>(num); });

    static_assert(equal(std::array{1, 192, 255, 238} | to_bytes | encode16, "01C0FFEE"sv));
    static_assert(equal("01C0ffee"sv | decode16, std::array{1, 192, 255, 238} | to_bytes));

    static_assert(equal(std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes | encode16,
                "0123456789ABCDEF"sv));
    static_assert(equal("0123456789aBCdEf"sv | decode16,
                std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes));

    static_assert(equal(std::array{1, 192, 255, 238} | to_bytes | encode16 | decode16,
                std::array{1, 192, 255, 238} | to_bytes));
    static_assert(equal("01C0ffee"sv | decode16 | encode16, "01C0FFEE"sv));

    static_assert(equal(std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes | encode16 | decode16,
                std::array{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF} | to_bytes));
    static_assert(equal("0123456789aBCdEf"sv | decode16 | encode16, "0123456789ABCDEF"sv));

    return 0;
}
\$\endgroup\$

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.