Skip to content

Commit

Permalink
Update lots of stuff to Unicode 15.1
Browse files Browse the repository at this point in the history
Update all ctype functions to support Unicode 15.1.
  • Loading branch information
RauliL committed Feb 8, 2024
1 parent 5713ec0 commit 04f295a
Show file tree
Hide file tree
Showing 16 changed files with 2,265 additions and 1,095 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

![Build](https://github.com/peelonet/peelo-unicode/workflows/Build/badge.svg)

Collection of simple to use [Unicode] utilities for C++17.
Collection of simple to use [Unicode] utilities for C++17. Supports Unicode
15.1.

[Doxygen generated API documentation.][API]

Expand Down
17 changes: 16 additions & 1 deletion include/peelo/unicode/ctype/_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@
#pragma once

#include <array>
#include <unordered_map>
#include <utility>

namespace peelo::unicode::ctype::utils
{
using range = std::pair<char32_t, char32_t>;

template<std::size_t Size>
inline bool table_lookup(const std::array<range, Size>& table, char32_t c)
inline bool
table_lookup(const std::array<range, Size>& table, char32_t c)
{
const auto size = table.size();

Expand All @@ -50,4 +52,17 @@ namespace peelo::unicode::ctype::utils

return false;
}

inline char32_t
case_lookup(const std::unordered_map<char32_t, char32_t>& map, char32_t c)
{
const auto i = map.find(c);

if (i != std::end(map))
{
return i->second;
}

return c;
}
}
155 changes: 3 additions & 152 deletions include/peelo/unicode/ctype/isalnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
*/
#pragma once

#include <peelo/unicode/ctype/_utils.hpp>
#include <peelo/unicode/ctype/isalpha.hpp>
#include <peelo/unicode/ctype/isdigit.hpp>

namespace peelo::unicode::ctype
{
Expand All @@ -36,156 +37,6 @@ namespace peelo::unicode::ctype
inline bool
isalnum(char32_t c)
{
static const std::array<utils::range, 436> alnum_table =
{{
{ 0x0030, 0x0039 }, { 0x0041, 0x005a }, { 0x0061, 0x007a },
{ 0x00aa, 0x00aa }, { 0x00b5, 0x00b5 }, { 0x00ba, 0x00ba },
{ 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0241 },
{ 0x0250, 0x02c1 }, { 0x02c6, 0x02d1 }, { 0x02e0, 0x02e4 },
{ 0x02ee, 0x02ee }, { 0x0300, 0x036f }, { 0x037a, 0x037a },
{ 0x0386, 0x0386 }, { 0x0388, 0x038a }, { 0x038c, 0x038c },
{ 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03f5 },
{ 0x03f7, 0x0481 }, { 0x0483, 0x0486 }, { 0x0488, 0x04ce },
{ 0x04d0, 0x04f9 }, { 0x0500, 0x050f }, { 0x0531, 0x0556 },
{ 0x0559, 0x0559 }, { 0x0561, 0x0587 }, { 0x0591, 0x05b9 },
{ 0x05bb, 0x05bd }, { 0x05bf, 0x05bf }, { 0x05c1, 0x05c2 },
{ 0x05c4, 0x05c5 }, { 0x05c7, 0x05c7 }, { 0x05d0, 0x05ea },
{ 0x05f0, 0x05f2 }, { 0x0610, 0x0615 }, { 0x0621, 0x063a },
{ 0x0640, 0x065e }, { 0x0660, 0x0669 }, { 0x066e, 0x06d3 },
{ 0x06d5, 0x06dc }, { 0x06de, 0x06e8 }, { 0x06ea, 0x06fc },
{ 0x06ff, 0x06ff }, { 0x0710, 0x074a }, { 0x074d, 0x076d },
{ 0x0780, 0x07b1 }, { 0x0901, 0x0939 }, { 0x093c, 0x094d },
{ 0x0950, 0x0954 }, { 0x0958, 0x0963 }, { 0x0966, 0x096f },
{ 0x097d, 0x097d }, { 0x0981, 0x0983 }, { 0x0985, 0x098c },
{ 0x098f, 0x0990 }, { 0x0993, 0x09a8 }, { 0x09aa, 0x09b0 },
{ 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 }, { 0x09bc, 0x09c4 },
{ 0x09c7, 0x09c8 }, { 0x09cb, 0x09ce }, { 0x09d7, 0x09d7 },
{ 0x09dc, 0x09dd }, { 0x09df, 0x09e3 }, { 0x09e6, 0x09f1 },
{ 0x0a01, 0x0a03 }, { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 },
{ 0x0a13, 0x0a28 }, { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 },
{ 0x0a35, 0x0a36 }, { 0x0a38, 0x0a39 }, { 0x0a3c, 0x0a3c },
{ 0x0a3e, 0x0a42 }, { 0x0a47, 0x0a48 }, { 0x0a4b, 0x0a4d },
{ 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e }, { 0x0a66, 0x0a74 },
{ 0x0a81, 0x0a83 }, { 0x0a85, 0x0a8d }, { 0x0a8f, 0x0a91 },
{ 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 },
{ 0x0ab5, 0x0ab9 }, { 0x0abc, 0x0ac5 }, { 0x0ac7, 0x0ac9 },
{ 0x0acb, 0x0acd }, { 0x0ad0, 0x0ad0 }, { 0x0ae0, 0x0ae3 },
{ 0x0ae6, 0x0aef }, { 0x0b01, 0x0b03 }, { 0x0b05, 0x0b0c },
{ 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 }, { 0x0b2a, 0x0b30 },
{ 0x0b32, 0x0b33 }, { 0x0b35, 0x0b39 }, { 0x0b3c, 0x0b43 },
{ 0x0b47, 0x0b48 }, { 0x0b4b, 0x0b4d }, { 0x0b56, 0x0b57 },
{ 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 }, { 0x0b66, 0x0b6f },
{ 0x0b71, 0x0b71 }, { 0x0b82, 0x0b83 }, { 0x0b85, 0x0b8a },
{ 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 }, { 0x0b99, 0x0b9a },
{ 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f }, { 0x0ba3, 0x0ba4 },
{ 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb9 }, { 0x0bbe, 0x0bc2 },
{ 0x0bc6, 0x0bc8 }, { 0x0bca, 0x0bcd }, { 0x0bd7, 0x0bd7 },
{ 0x0be6, 0x0bef }, { 0x0c01, 0x0c03 }, { 0x0c05, 0x0c0c },
{ 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 }, { 0x0c2a, 0x0c33 },
{ 0x0c35, 0x0c39 }, { 0x0c3e, 0x0c44 }, { 0x0c46, 0x0c48 },
{ 0x0c4a, 0x0c4d }, { 0x0c55, 0x0c56 }, { 0x0c60, 0x0c61 },
{ 0x0c66, 0x0c6f }, { 0x0c82, 0x0c83 }, { 0x0c85, 0x0c8c },
{ 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 }, { 0x0caa, 0x0cb3 },
{ 0x0cb5, 0x0cb9 }, { 0x0cbc, 0x0cc4 }, { 0x0cc6, 0x0cc8 },
{ 0x0cca, 0x0ccd }, { 0x0cd5, 0x0cd6 }, { 0x0cde, 0x0cde },
{ 0x0ce0, 0x0ce1 }, { 0x0ce6, 0x0cef }, { 0x0d02, 0x0d03 },
{ 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 },
{ 0x0d2a, 0x0d39 }, { 0x0d3e, 0x0d43 }, { 0x0d46, 0x0d48 },
{ 0x0d4a, 0x0d4d }, { 0x0d57, 0x0d57 }, { 0x0d60, 0x0d61 },
{ 0x0d66, 0x0d6f }, { 0x0d82, 0x0d83 }, { 0x0d85, 0x0d96 },
{ 0x0d9a, 0x0db1 }, { 0x0db3, 0x0dbb }, { 0x0dbd, 0x0dbd },
{ 0x0dc0, 0x0dc6 }, { 0x0dca, 0x0dca }, { 0x0dcf, 0x0dd4 },
{ 0x0dd6, 0x0dd6 }, { 0x0dd8, 0x0ddf }, { 0x0df2, 0x0df3 },
{ 0x0e01, 0x0e3a }, { 0x0e40, 0x0e4e }, { 0x0e50, 0x0e59 },
{ 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e88 },
{ 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d }, { 0x0e94, 0x0e97 },
{ 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 }, { 0x0ea5, 0x0ea5 },
{ 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eab }, { 0x0ead, 0x0eb9 },
{ 0x0ebb, 0x0ebd }, { 0x0ec0, 0x0ec4 }, { 0x0ec6, 0x0ec6 },
{ 0x0ec8, 0x0ecd }, { 0x0ed0, 0x0ed9 }, { 0x0edc, 0x0edd },
{ 0x0f00, 0x0f00 }, { 0x0f18, 0x0f19 }, { 0x0f20, 0x0f29 },
{ 0x0f35, 0x0f35 }, { 0x0f37, 0x0f37 }, { 0x0f39, 0x0f39 },
{ 0x0f3e, 0x0f47 }, { 0x0f49, 0x0f6a }, { 0x0f71, 0x0f84 },
{ 0x0f86, 0x0f8b }, { 0x0f90, 0x0f97 }, { 0x0f99, 0x0fbc },
{ 0x0fc6, 0x0fc6 }, { 0x1000, 0x1021 }, { 0x1023, 0x1027 },
{ 0x1029, 0x102a }, { 0x102c, 0x1032 }, { 0x1036, 0x1039 },
{ 0x1040, 0x1049 }, { 0x1050, 0x1059 }, { 0x10a0, 0x10c5 },
{ 0x10d0, 0x10fa }, { 0x10fc, 0x10fc }, { 0x1100, 0x1159 },
{ 0x115f, 0x11a2 }, { 0x11a8, 0x11f9 }, { 0x1200, 0x1248 },
{ 0x124a, 0x124d }, { 0x1250, 0x1256 }, { 0x1258, 0x1258 },
{ 0x125a, 0x125d }, { 0x1260, 0x1288 }, { 0x128a, 0x128d },
{ 0x1290, 0x12b0 }, { 0x12b2, 0x12b5 }, { 0x12b8, 0x12be },
{ 0x12c0, 0x12c0 }, { 0x12c2, 0x12c5 }, { 0x12c8, 0x12d6 },
{ 0x12d8, 0x1310 }, { 0x1312, 0x1315 }, { 0x1318, 0x135a },
{ 0x135f, 0x135f }, { 0x1380, 0x138f }, { 0x13a0, 0x13f4 },
{ 0x1401, 0x166c }, { 0x166f, 0x1676 }, { 0x1681, 0x169a },
{ 0x16a0, 0x16ea }, { 0x1700, 0x170c }, { 0x170e, 0x1714 },
{ 0x1720, 0x1734 }, { 0x1740, 0x1753 }, { 0x1760, 0x176c },
{ 0x176e, 0x1770 }, { 0x1772, 0x1773 }, { 0x1780, 0x17b3 },
{ 0x17b6, 0x17d3 }, { 0x17d7, 0x17d7 }, { 0x17dc, 0x17dd },
{ 0x17e0, 0x17e9 }, { 0x180b, 0x180d }, { 0x1810, 0x1819 },
{ 0x1820, 0x1877 }, { 0x1880, 0x18a9 }, { 0x1900, 0x191c },
{ 0x1920, 0x192b }, { 0x1930, 0x193b }, { 0x1946, 0x196d },
{ 0x1970, 0x1974 }, { 0x1980, 0x19a9 }, { 0x19b0, 0x19c9 },
{ 0x19d0, 0x19d9 }, { 0x1a00, 0x1a1b }, { 0x1d00, 0x1dc3 },
{ 0x1e00, 0x1e9b }, { 0x1ea0, 0x1ef9 }, { 0x1f00, 0x1f15 },
{ 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, { 0x1f48, 0x1f4d },
{ 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 }, { 0x1f5b, 0x1f5b },
{ 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d }, { 0x1f80, 0x1fb4 },
{ 0x1fb6, 0x1fbc }, { 0x1fbe, 0x1fbe }, { 0x1fc2, 0x1fc4 },
{ 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb },
{ 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc },
{ 0x2071, 0x2071 }, { 0x207f, 0x207f }, { 0x2090, 0x2094 },
{ 0x20d0, 0x20eb }, { 0x2102, 0x2102 }, { 0x2107, 0x2107 },
{ 0x210a, 0x2113 }, { 0x2115, 0x2115 }, { 0x2119, 0x211d },
{ 0x2124, 0x2124 }, { 0x2126, 0x2126 }, { 0x2128, 0x2128 },
{ 0x212a, 0x212d }, { 0x212f, 0x2131 }, { 0x2133, 0x2139 },
{ 0x213c, 0x213f }, { 0x2145, 0x2149 }, { 0x2c00, 0x2c2e },
{ 0x2c30, 0x2c5e }, { 0x2c80, 0x2ce4 }, { 0x2d00, 0x2d25 },
{ 0x2d30, 0x2d65 }, { 0x2d6f, 0x2d6f }, { 0x2d80, 0x2d96 },
{ 0x2da0, 0x2da6 }, { 0x2da8, 0x2dae }, { 0x2db0, 0x2db6 },
{ 0x2db8, 0x2dbe }, { 0x2dc0, 0x2dc6 }, { 0x2dc8, 0x2dce },
{ 0x2dd0, 0x2dd6 }, { 0x2dd8, 0x2dde }, { 0x3005, 0x3006 },
{ 0x302a, 0x302f }, { 0x3031, 0x3035 }, { 0x303b, 0x303c },
{ 0x3041, 0x3096 }, { 0x3099, 0x309a }, { 0x309d, 0x309f },
{ 0x30a1, 0x30fa }, { 0x30fc, 0x30ff }, { 0x3105, 0x312c },
{ 0x3131, 0x318e }, { 0x31a0, 0x31b7 }, { 0x31f0, 0x31ff },
{ 0x3400, 0x4db5 }, { 0x4e00, 0x9fbb }, { 0xa000, 0xa48c },
{ 0xa800, 0xa827 }, { 0xac00, 0xd7a3 }, { 0xf900, 0xfa2d },
{ 0xfa30, 0xfa6a }, { 0xfa70, 0xfad9 }, { 0xfb00, 0xfb06 },
{ 0xfb13, 0xfb17 }, { 0xfb1d, 0xfb28 }, { 0xfb2a, 0xfb36 },
{ 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, { 0xfb40, 0xfb41 },
{ 0xfb43, 0xfb44 }, { 0xfb46, 0xfbb1 }, { 0xfbd3, 0xfd3d },
{ 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, { 0xfdf0, 0xfdfb },
{ 0xfe00, 0xfe0f }, { 0xfe20, 0xfe23 }, { 0xfe70, 0xfe74 },
{ 0xfe76, 0xfefc }, { 0xff10, 0xff19 }, { 0xff21, 0xff3a },
{ 0xff41, 0xff5a }, { 0xff66, 0xffbe }, { 0xffc2, 0xffc7 },
{ 0xffca, 0xffcf }, { 0xffd2, 0xffd7 }, { 0xffda, 0xffdc },
{ 0x10000, 0x1000b }, { 0x1000d, 0x10026 }, { 0x10028, 0x1003a },
{ 0x1003c, 0x1003d }, { 0x1003f, 0x1004d }, { 0x10050, 0x1005d },
{ 0x10080, 0x100fa }, { 0x10300, 0x1031e }, { 0x10330, 0x10349 },
{ 0x10380, 0x1039d }, { 0x103a0, 0x103c3 }, { 0x103c8, 0x103cf },
{ 0x10400, 0x1049d }, { 0x104a0, 0x104a9 }, { 0x10800, 0x10805 },
{ 0x10808, 0x10808 }, { 0x1080a, 0x10835 }, { 0x10837, 0x10838 },
{ 0x1083c, 0x1083c }, { 0x1083f, 0x1083f }, { 0x10a00, 0x10a03 },
{ 0x10a05, 0x10a06 }, { 0x10a0c, 0x10a13 }, { 0x10a15, 0x10a17 },
{ 0x10a19, 0x10a33 }, { 0x10a38, 0x10a3a }, { 0x10a3f, 0x10a3f },
{ 0x1d165, 0x1d169 }, { 0x1d16d, 0x1d172 }, { 0x1d17b, 0x1d182 },
{ 0x1d185, 0x1d18b }, { 0x1d1aa, 0x1d1ad }, { 0x1d242, 0x1d244 },
{ 0x1d400, 0x1d454 }, { 0x1d456, 0x1d49c }, { 0x1d49e, 0x1d49f },
{ 0x1d4a2, 0x1d4a2 }, { 0x1d4a5, 0x1d4a6 }, { 0x1d4a9, 0x1d4ac },
{ 0x1d4ae, 0x1d4b9 }, { 0x1d4bb, 0x1d4bb }, { 0x1d4bd, 0x1d4c3 },
{ 0x1d4c5, 0x1d505 }, { 0x1d507, 0x1d50a }, { 0x1d50d, 0x1d514 },
{ 0x1d516, 0x1d51c }, { 0x1d51e, 0x1d539 }, { 0x1d53b, 0x1d53e },
{ 0x1d540, 0x1d544 }, { 0x1d546, 0x1d546 }, { 0x1d54a, 0x1d550 },
{ 0x1d552, 0x1d6a5 }, { 0x1d6a8, 0x1d6c0 }, { 0x1d6c2, 0x1d6da },
{ 0x1d6dc, 0x1d6fa }, { 0x1d6fc, 0x1d714 }, { 0x1d716, 0x1d734 },
{ 0x1d736, 0x1d74e }, { 0x1d750, 0x1d76e }, { 0x1d770, 0x1d788 },
{ 0x1d78a, 0x1d7a8 }, { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7c9 },
{ 0x1d7ce, 0x1d7ff }, { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d },
{ 0xe0100, 0xe01ef }
}};

return utils::table_lookup(alnum_table, c);
return isdigit(c) || isalpha(c);
}
}
Loading

0 comments on commit 04f295a

Please sign in to comment.