Skip to content

Commit

Permalink
Update lots of stuff to Unicode 15.1
Browse files Browse the repository at this point in the history
Update all ctype functions to support Unicode 15.1.
  • Loading branch information
RauliL committed Feb 8, 2024
1 parent 5713ec0 commit e7e1d99
Show file tree
Hide file tree
Showing 15 changed files with 2,263 additions and 1,094 deletions.
17 changes: 16 additions & 1 deletion include/peelo/unicode/ctype/_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@
#pragma once

#include <array>
#include <unordered_map>
#include <utility>

namespace peelo::unicode::ctype::utils
{
using range = std::pair<char32_t, char32_t>;

template<std::size_t Size>
inline bool table_lookup(const std::array<range, Size>& table, char32_t c)
inline bool
table_lookup(const std::array<range, Size>& table, char32_t c)
{
const auto size = table.size();

Expand All @@ -50,4 +52,17 @@ namespace peelo::unicode::ctype::utils

return false;
}

inline char32_t
case_lookup(const std::unordered_map<char32_t, char32_t>& map, char32_t c)
{
const auto i = map.find(c);

if (i != std::end(map))
{
return i->second;
}

return c;
}
}
155 changes: 3 additions & 152 deletions include/peelo/unicode/ctype/isalnum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
*/
#pragma once

#include <peelo/unicode/ctype/_utils.hpp>
#include <peelo/unicode/ctype/isalpha.hpp>
#include <peelo/unicode/ctype/isdigit.hpp>

namespace peelo::unicode::ctype
{
Expand All @@ -36,156 +37,6 @@ namespace peelo::unicode::ctype
inline bool
isalnum(char32_t c)
{
static const std::array<utils::range, 436> alnum_table =
{{
{ 0x0030, 0x0039 }, { 0x0041, 0x005a }, { 0x0061, 0x007a },
{ 0x00aa, 0x00aa }, { 0x00b5, 0x00b5 }, { 0x00ba, 0x00ba },
{ 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x0241 },
{ 0x0250, 0x02c1 }, { 0x02c6, 0x02d1 }, { 0x02e0, 0x02e4 },
{ 0x02ee, 0x02ee }, { 0x0300, 0x036f }, { 0x037a, 0x037a },
{ 0x0386, 0x0386 }, { 0x0388, 0x038a }, { 0x038c, 0x038c },
{ 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03f5 },
{ 0x03f7, 0x0481 }, { 0x0483, 0x0486 }, { 0x0488, 0x04ce },
{ 0x04d0, 0x04f9 }, { 0x0500, 0x050f }, { 0x0531, 0x0556 },
{ 0x0559, 0x0559 }, { 0x0561, 0x0587 }, { 0x0591, 0x05b9 },
{ 0x05bb, 0x05bd }, { 0x05bf, 0x05bf }, { 0x05c1, 0x05c2 },
{ 0x05c4, 0x05c5 }, { 0x05c7, 0x05c7 }, { 0x05d0, 0x05ea },
{ 0x05f0, 0x05f2 }, { 0x0610, 0x0615 }, { 0x0621, 0x063a },
{ 0x0640, 0x065e }, { 0x0660, 0x0669 }, { 0x066e, 0x06d3 },
{ 0x06d5, 0x06dc }, { 0x06de, 0x06e8 }, { 0x06ea, 0x06fc },
{ 0x06ff, 0x06ff }, { 0x0710, 0x074a }, { 0x074d, 0x076d },
{ 0x0780, 0x07b1 }, { 0x0901, 0x0939 }, { 0x093c, 0x094d },
{ 0x0950, 0x0954 }, { 0x0958, 0x0963 }, { 0x0966, 0x096f },
{ 0x097d, 0x097d }, { 0x0981, 0x0983 }, { 0x0985, 0x098c },
{ 0x098f, 0x0990 }, { 0x0993, 0x09a8 }, { 0x09aa, 0x09b0 },
{ 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 }, { 0x09bc, 0x09c4 },
{ 0x09c7, 0x09c8 }, { 0x09cb, 0x09ce }, { 0x09d7, 0x09d7 },
{ 0x09dc, 0x09dd }, { 0x09df, 0x09e3 }, { 0x09e6, 0x09f1 },
{ 0x0a01, 0x0a03 }, { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 },
{ 0x0a13, 0x0a28 }, { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 },
{ 0x0a35, 0x0a36 }, { 0x0a38, 0x0a39 }, { 0x0a3c, 0x0a3c },
{ 0x0a3e, 0x0a42 }, { 0x0a47, 0x0a48 }, { 0x0a4b, 0x0a4d },
{ 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e }, { 0x0a66, 0x0a74 },
{ 0x0a81, 0x0a83 }, { 0x0a85, 0x0a8d }, { 0x0a8f, 0x0a91 },
{ 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 },
{ 0x0ab5, 0x0ab9 }, { 0x0abc, 0x0ac5 }, { 0x0ac7, 0x0ac9 },
{ 0x0acb, 0x0acd }, { 0x0ad0, 0x0ad0 }, { 0x0ae0, 0x0ae3 },
{ 0x0ae6, 0x0aef }, { 0x0b01, 0x0b03 }, { 0x0b05, 0x0b0c },
{ 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 }, { 0x0b2a, 0x0b30 },
{ 0x0b32, 0x0b33 }, { 0x0b35, 0x0b39 }, { 0x0b3c, 0x0b43 },
{ 0x0b47, 0x0b48 }, { 0x0b4b, 0x0b4d }, { 0x0b56, 0x0b57 },
{ 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 }, { 0x0b66, 0x0b6f },
{ 0x0b71, 0x0b71 }, { 0x0b82, 0x0b83 }, { 0x0b85, 0x0b8a },
{ 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 }, { 0x0b99, 0x0b9a },
{ 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f }, { 0x0ba3, 0x0ba4 },
{ 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb9 }, { 0x0bbe, 0x0bc2 },
{ 0x0bc6, 0x0bc8 }, { 0x0bca, 0x0bcd }, { 0x0bd7, 0x0bd7 },
{ 0x0be6, 0x0bef }, { 0x0c01, 0x0c03 }, { 0x0c05, 0x0c0c },
{ 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 }, { 0x0c2a, 0x0c33 },
{ 0x0c35, 0x0c39 }, { 0x0c3e, 0x0c44 }, { 0x0c46, 0x0c48 },
{ 0x0c4a, 0x0c4d }, { 0x0c55, 0x0c56 }, { 0x0c60, 0x0c61 },
{ 0x0c66, 0x0c6f }, { 0x0c82, 0x0c83 }, { 0x0c85, 0x0c8c },
{ 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 }, { 0x0caa, 0x0cb3 },
{ 0x0cb5, 0x0cb9 }, { 0x0cbc, 0x0cc4 }, { 0x0cc6, 0x0cc8 },
{ 0x0cca, 0x0ccd }, { 0x0cd5, 0x0cd6 }, { 0x0cde, 0x0cde },
{ 0x0ce0, 0x0ce1 }, { 0x0ce6, 0x0cef }, { 0x0d02, 0x0d03 },
{ 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 },
{ 0x0d2a, 0x0d39 }, { 0x0d3e, 0x0d43 }, { 0x0d46, 0x0d48 },
{ 0x0d4a, 0x0d4d }, { 0x0d57, 0x0d57 }, { 0x0d60, 0x0d61 },
{ 0x0d66, 0x0d6f }, { 0x0d82, 0x0d83 }, { 0x0d85, 0x0d96 },
{ 0x0d9a, 0x0db1 }, { 0x0db3, 0x0dbb }, { 0x0dbd, 0x0dbd },
{ 0x0dc0, 0x0dc6 }, { 0x0dca, 0x0dca }, { 0x0dcf, 0x0dd4 },
{ 0x0dd6, 0x0dd6 }, { 0x0dd8, 0x0ddf }, { 0x0df2, 0x0df3 },
{ 0x0e01, 0x0e3a }, { 0x0e40, 0x0e4e }, { 0x0e50, 0x0e59 },
{ 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e88 },
{ 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d }, { 0x0e94, 0x0e97 },
{ 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 }, { 0x0ea5, 0x0ea5 },
{ 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eab }, { 0x0ead, 0x0eb9 },
{ 0x0ebb, 0x0ebd }, { 0x0ec0, 0x0ec4 }, { 0x0ec6, 0x0ec6 },
{ 0x0ec8, 0x0ecd }, { 0x0ed0, 0x0ed9 }, { 0x0edc, 0x0edd },
{ 0x0f00, 0x0f00 }, { 0x0f18, 0x0f19 }, { 0x0f20, 0x0f29 },
{ 0x0f35, 0x0f35 }, { 0x0f37, 0x0f37 }, { 0x0f39, 0x0f39 },
{ 0x0f3e, 0x0f47 }, { 0x0f49, 0x0f6a }, { 0x0f71, 0x0f84 },
{ 0x0f86, 0x0f8b }, { 0x0f90, 0x0f97 }, { 0x0f99, 0x0fbc },
{ 0x0fc6, 0x0fc6 }, { 0x1000, 0x1021 }, { 0x1023, 0x1027 },
{ 0x1029, 0x102a }, { 0x102c, 0x1032 }, { 0x1036, 0x1039 },
{ 0x1040, 0x1049 }, { 0x1050, 0x1059 }, { 0x10a0, 0x10c5 },
{ 0x10d0, 0x10fa }, { 0x10fc, 0x10fc }, { 0x1100, 0x1159 },
{ 0x115f, 0x11a2 }, { 0x11a8, 0x11f9 }, { 0x1200, 0x1248 },
{ 0x124a, 0x124d }, { 0x1250, 0x1256 }, { 0x1258, 0x1258 },
{ 0x125a, 0x125d }, { 0x1260, 0x1288 }, { 0x128a, 0x128d },
{ 0x1290, 0x12b0 }, { 0x12b2, 0x12b5 }, { 0x12b8, 0x12be },
{ 0x12c0, 0x12c0 }, { 0x12c2, 0x12c5 }, { 0x12c8, 0x12d6 },
{ 0x12d8, 0x1310 }, { 0x1312, 0x1315 }, { 0x1318, 0x135a },
{ 0x135f, 0x135f }, { 0x1380, 0x138f }, { 0x13a0, 0x13f4 },
{ 0x1401, 0x166c }, { 0x166f, 0x1676 }, { 0x1681, 0x169a },
{ 0x16a0, 0x16ea }, { 0x1700, 0x170c }, { 0x170e, 0x1714 },
{ 0x1720, 0x1734 }, { 0x1740, 0x1753 }, { 0x1760, 0x176c },
{ 0x176e, 0x1770 }, { 0x1772, 0x1773 }, { 0x1780, 0x17b3 },
{ 0x17b6, 0x17d3 }, { 0x17d7, 0x17d7 }, { 0x17dc, 0x17dd },
{ 0x17e0, 0x17e9 }, { 0x180b, 0x180d }, { 0x1810, 0x1819 },
{ 0x1820, 0x1877 }, { 0x1880, 0x18a9 }, { 0x1900, 0x191c },
{ 0x1920, 0x192b }, { 0x1930, 0x193b }, { 0x1946, 0x196d },
{ 0x1970, 0x1974 }, { 0x1980, 0x19a9 }, { 0x19b0, 0x19c9 },
{ 0x19d0, 0x19d9 }, { 0x1a00, 0x1a1b }, { 0x1d00, 0x1dc3 },
{ 0x1e00, 0x1e9b }, { 0x1ea0, 0x1ef9 }, { 0x1f00, 0x1f15 },
{ 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, { 0x1f48, 0x1f4d },
{ 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 }, { 0x1f5b, 0x1f5b },
{ 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d }, { 0x1f80, 0x1fb4 },
{ 0x1fb6, 0x1fbc }, { 0x1fbe, 0x1fbe }, { 0x1fc2, 0x1fc4 },
{ 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb },
{ 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc },
{ 0x2071, 0x2071 }, { 0x207f, 0x207f }, { 0x2090, 0x2094 },
{ 0x20d0, 0x20eb }, { 0x2102, 0x2102 }, { 0x2107, 0x2107 },
{ 0x210a, 0x2113 }, { 0x2115, 0x2115 }, { 0x2119, 0x211d },
{ 0x2124, 0x2124 }, { 0x2126, 0x2126 }, { 0x2128, 0x2128 },
{ 0x212a, 0x212d }, { 0x212f, 0x2131 }, { 0x2133, 0x2139 },
{ 0x213c, 0x213f }, { 0x2145, 0x2149 }, { 0x2c00, 0x2c2e },
{ 0x2c30, 0x2c5e }, { 0x2c80, 0x2ce4 }, { 0x2d00, 0x2d25 },
{ 0x2d30, 0x2d65 }, { 0x2d6f, 0x2d6f }, { 0x2d80, 0x2d96 },
{ 0x2da0, 0x2da6 }, { 0x2da8, 0x2dae }, { 0x2db0, 0x2db6 },
{ 0x2db8, 0x2dbe }, { 0x2dc0, 0x2dc6 }, { 0x2dc8, 0x2dce },
{ 0x2dd0, 0x2dd6 }, { 0x2dd8, 0x2dde }, { 0x3005, 0x3006 },
{ 0x302a, 0x302f }, { 0x3031, 0x3035 }, { 0x303b, 0x303c },
{ 0x3041, 0x3096 }, { 0x3099, 0x309a }, { 0x309d, 0x309f },
{ 0x30a1, 0x30fa }, { 0x30fc, 0x30ff }, { 0x3105, 0x312c },
{ 0x3131, 0x318e }, { 0x31a0, 0x31b7 }, { 0x31f0, 0x31ff },
{ 0x3400, 0x4db5 }, { 0x4e00, 0x9fbb }, { 0xa000, 0xa48c },
{ 0xa800, 0xa827 }, { 0xac00, 0xd7a3 }, { 0xf900, 0xfa2d },
{ 0xfa30, 0xfa6a }, { 0xfa70, 0xfad9 }, { 0xfb00, 0xfb06 },
{ 0xfb13, 0xfb17 }, { 0xfb1d, 0xfb28 }, { 0xfb2a, 0xfb36 },
{ 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, { 0xfb40, 0xfb41 },
{ 0xfb43, 0xfb44 }, { 0xfb46, 0xfbb1 }, { 0xfbd3, 0xfd3d },
{ 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, { 0xfdf0, 0xfdfb },
{ 0xfe00, 0xfe0f }, { 0xfe20, 0xfe23 }, { 0xfe70, 0xfe74 },
{ 0xfe76, 0xfefc }, { 0xff10, 0xff19 }, { 0xff21, 0xff3a },
{ 0xff41, 0xff5a }, { 0xff66, 0xffbe }, { 0xffc2, 0xffc7 },
{ 0xffca, 0xffcf }, { 0xffd2, 0xffd7 }, { 0xffda, 0xffdc },
{ 0x10000, 0x1000b }, { 0x1000d, 0x10026 }, { 0x10028, 0x1003a },
{ 0x1003c, 0x1003d }, { 0x1003f, 0x1004d }, { 0x10050, 0x1005d },
{ 0x10080, 0x100fa }, { 0x10300, 0x1031e }, { 0x10330, 0x10349 },
{ 0x10380, 0x1039d }, { 0x103a0, 0x103c3 }, { 0x103c8, 0x103cf },
{ 0x10400, 0x1049d }, { 0x104a0, 0x104a9 }, { 0x10800, 0x10805 },
{ 0x10808, 0x10808 }, { 0x1080a, 0x10835 }, { 0x10837, 0x10838 },
{ 0x1083c, 0x1083c }, { 0x1083f, 0x1083f }, { 0x10a00, 0x10a03 },
{ 0x10a05, 0x10a06 }, { 0x10a0c, 0x10a13 }, { 0x10a15, 0x10a17 },
{ 0x10a19, 0x10a33 }, { 0x10a38, 0x10a3a }, { 0x10a3f, 0x10a3f },
{ 0x1d165, 0x1d169 }, { 0x1d16d, 0x1d172 }, { 0x1d17b, 0x1d182 },
{ 0x1d185, 0x1d18b }, { 0x1d1aa, 0x1d1ad }, { 0x1d242, 0x1d244 },
{ 0x1d400, 0x1d454 }, { 0x1d456, 0x1d49c }, { 0x1d49e, 0x1d49f },
{ 0x1d4a2, 0x1d4a2 }, { 0x1d4a5, 0x1d4a6 }, { 0x1d4a9, 0x1d4ac },
{ 0x1d4ae, 0x1d4b9 }, { 0x1d4bb, 0x1d4bb }, { 0x1d4bd, 0x1d4c3 },
{ 0x1d4c5, 0x1d505 }, { 0x1d507, 0x1d50a }, { 0x1d50d, 0x1d514 },
{ 0x1d516, 0x1d51c }, { 0x1d51e, 0x1d539 }, { 0x1d53b, 0x1d53e },
{ 0x1d540, 0x1d544 }, { 0x1d546, 0x1d546 }, { 0x1d54a, 0x1d550 },
{ 0x1d552, 0x1d6a5 }, { 0x1d6a8, 0x1d6c0 }, { 0x1d6c2, 0x1d6da },
{ 0x1d6dc, 0x1d6fa }, { 0x1d6fc, 0x1d714 }, { 0x1d716, 0x1d734 },
{ 0x1d736, 0x1d74e }, { 0x1d750, 0x1d76e }, { 0x1d770, 0x1d788 },
{ 0x1d78a, 0x1d7a8 }, { 0x1d7aa, 0x1d7c2 }, { 0x1d7c4, 0x1d7c9 },
{ 0x1d7ce, 0x1d7ff }, { 0x20000, 0x2a6d6 }, { 0x2f800, 0x2fa1d },
{ 0xe0100, 0xe01ef }
}};

return utils::table_lookup(alnum_table, c);
return isdigit(c) || isalpha(c);
}
}
Loading

0 comments on commit e7e1d99

Please sign in to comment.