-
-
Notifications
You must be signed in to change notification settings - Fork 606
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
32 changed files
with
5,901 additions
and
457 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Expansion of identifier tables to allow new characters to match C23 have been added along with CLI configurability | ||
|
||
You can currently choose between ``c99``, ``c11``, ``UAX31`` (C23's) and ``all`` (the least restrictive set) for both D and ImportC. | ||
|
||
This can be done with ``-identifiers=<table>`` and for ImportC ``-identifiers-importc=<table>``. | ||
|
||
The default table for D is currently set to ``all``, while ImportC is set to ``c11``. | ||
Previously both D and ImportC used the ``c99`` tables. | ||
|
||
D's table will be swapped over at a later date to [UAX31](https://unicode.org/reports/tr31/), this should be done in 2.117. | ||
If you find yourself at this time using ``c99`` specific characters and not willing to change them, you may switch back to ``all``. | ||
Although it should be unlikely that you will need to. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
ImportC has improved Unicode support | ||
|
||
Universal Character Names are now supported, allowing you to use the ``\uXXXX`` and ``\UXXXXXXXX`` syntax where ``X`` is a hex digit as part of an identifier. | ||
|
||
DigitalMars sppn does not support anything newer than C99. | ||
It is known to be limited and using any Unicode character not in those ranges will result in an error. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,267 @@ | ||
/** | ||
* Character tables related to identifiers. | ||
* | ||
* Supports UAX31, C99, C11 and least restrictive (All). | ||
* | ||
* Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved | ||
* Authors: $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole) | ||
* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) | ||
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/charactertables.d, common/charactertables.d) | ||
* Documentation: https://dlang.org/phobos/dmd_common_charactertables.html | ||
* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/common/charactertables.d | ||
*/ | ||
module dmd.common.charactertables; | ||
|
||
@safe nothrow @nogc pure: | ||
|
||
extern(C++): | ||
|
||
/// | ||
enum IdentifierTable { | ||
UAX31, /// | ||
C99, /// | ||
C11, /// | ||
LR, /// Least Restrictive aka All | ||
} | ||
|
||
/// | ||
struct IdentifierCharLookup | ||
{ | ||
@safe nothrow @nogc pure: | ||
|
||
/// | ||
extern(C++) bool function(dchar) isStart; | ||
/// | ||
extern(C++) bool function(dchar) isContinue; | ||
|
||
/// Lookup the table given the table name | ||
static IdentifierCharLookup forTable(IdentifierTable table) | ||
{ | ||
import dmd.common.identifiertables; | ||
|
||
// Awful solution to require these lambdas. | ||
// However without them the extern(C++) ABI issues crop up for isInRange, | ||
// and then it can't access the tables. | ||
final switch(table) { | ||
case IdentifierTable.UAX31: | ||
return IdentifierCharLookup( | ||
(c) => isInRange!UAX31_Start(c), | ||
(c) => isInRange!UAX31_Continue(c)); | ||
case IdentifierTable.C99: | ||
return IdentifierCharLookup( | ||
(c) => isInRange!FixedTable_C99_Start(c), | ||
(c) => isInRange!FixedTable_C99_Continue(c)); | ||
case IdentifierTable.C11: | ||
return IdentifierCharLookup( | ||
(c) => isInRange!FixedTable_C11_Start(c), | ||
(c) => isInRange!FixedTable_C11_Continue(c)); | ||
case IdentifierTable.LR: | ||
return IdentifierCharLookup( | ||
(c) => isInRange!LeastRestrictive_Start(c), | ||
(c) => isInRange!LeastRestrictive_Continue(c)); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
Convenience function for use in places where we just don't care, | ||
what the identifier ranges are, or if it is start/continue. | ||
Returns: is character a member of least restrictive of all. | ||
*/ | ||
bool isAnyIdentifierCharacter(dchar c) | ||
{ | ||
import dmd.common.identifiertables; | ||
return isInRange!LeastRestrictive_OfAll(c); | ||
} | ||
|
||
/// | ||
unittest | ||
{ | ||
assert(isAnyContinue('ğ')); | ||
} | ||
|
||
/** | ||
Convenience function for use in places where we just don't care, | ||
what the identifier ranges are. | ||
Returns: is character a member of restrictive Start | ||
*/ | ||
bool isAnyStart(dchar c) | ||
{ | ||
import dmd.common.identifiertables; | ||
return isInRange!LeastRestrictive_Start(c); | ||
} | ||
|
||
/// | ||
unittest | ||
{ | ||
assert(isAnyStart('ğ')); | ||
} | ||
|
||
/** | ||
Convenience function for use in places where we just don't care, | ||
what the identifier ranges are. | ||
Returns: is character a member of least restrictive Continue | ||
*/ | ||
bool isAnyContinue(dchar c) | ||
{ | ||
import dmd.common.identifiertables; | ||
return isInRange!LeastRestrictive_Continue(c); | ||
} | ||
|
||
/// | ||
unittest | ||
{ | ||
assert(isAnyContinue('ğ')); | ||
} | ||
|
||
/// UTF line separator | ||
enum LS = 0x2028; | ||
/// UTF paragraph separator | ||
enum PS = 0x2029; | ||
|
||
private | ||
{ | ||
enum CMoctal = 0x1; | ||
enum CMhex = 0x2; | ||
enum CMidchar = 0x4; | ||
enum CMzerosecond = 0x8; | ||
enum CMdigitsecond = 0x10; | ||
enum CMsinglechar = 0x20; | ||
} | ||
|
||
/// | ||
bool isoctal(const char c) | ||
{ | ||
return (cmtable[c] & CMoctal) != 0; | ||
} | ||
|
||
/// | ||
bool ishex(const char c) | ||
{ | ||
return (cmtable[c] & CMhex) != 0; | ||
} | ||
|
||
/// | ||
bool isidchar(const char c) | ||
{ | ||
return (cmtable[c] & CMidchar) != 0; | ||
} | ||
|
||
/// | ||
bool isZeroSecond(const char c) | ||
{ | ||
return (cmtable[c] & CMzerosecond) != 0; | ||
} | ||
|
||
/// | ||
bool isDigitSecond(const char c) | ||
{ | ||
return (cmtable[c] & CMdigitsecond) != 0; | ||
} | ||
|
||
/// | ||
bool issinglechar(const char c) | ||
{ | ||
return (cmtable[c] & CMsinglechar) != 0; | ||
} | ||
|
||
/// | ||
bool c_isxdigit(const int c) | ||
{ | ||
return (( c >= '0' && c <= '9') || | ||
( c >= 'a' && c <= 'f') || | ||
( c >= 'A' && c <= 'F')); | ||
} | ||
|
||
/// | ||
bool c_isalnum(const int c) | ||
{ | ||
return (( c >= '0' && c <= '9') || | ||
( c >= 'a' && c <= 'z') || | ||
( c >= 'A' && c <= 'Z')); | ||
} | ||
|
||
extern(D) private: | ||
|
||
// originally from dmd.root.utf | ||
bool isInRange(alias Ranges)(dchar c) | ||
{ | ||
size_t high = Ranges.length - 1; | ||
// Shortcut search if c is out of range | ||
size_t low = (c < Ranges[0][0] || Ranges[high][1] < c) ? high + 1 : 0; | ||
// Binary search | ||
while (low <= high) | ||
{ | ||
const size_t mid = low + ((high - low) >> 1); | ||
if (c < Ranges[mid][0]) | ||
high = mid - 1; | ||
else if (Ranges[mid][1] < c) | ||
low = mid + 1; | ||
else | ||
{ | ||
assert(Ranges[mid][0] <= c && c <= Ranges[mid][1]); | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
/******************************************** | ||
* Do our own char maps | ||
*/ | ||
// originally from dmd.lexer (was private) | ||
static immutable cmtable = () | ||
{ | ||
ubyte[256] table; | ||
foreach (const c; 0 .. table.length) | ||
{ | ||
if ('0' <= c && c <= '7') | ||
table[c] |= CMoctal; | ||
if (c_isxdigit(c)) | ||
table[c] |= CMhex; | ||
if (c_isalnum(c) || c == '_') | ||
table[c] |= CMidchar; | ||
|
||
switch (c) | ||
{ | ||
case 'x': case 'X': | ||
case 'b': case 'B': | ||
table[c] |= CMzerosecond; | ||
break; | ||
|
||
case '0': .. case '9': | ||
case 'e': case 'E': | ||
case 'f': case 'F': | ||
case 'l': case 'L': | ||
case 'p': case 'P': | ||
case 'u': case 'U': | ||
case 'i': | ||
case '.': | ||
case '_': | ||
table[c] |= CMzerosecond | CMdigitsecond; | ||
break; | ||
|
||
default: | ||
break; | ||
} | ||
|
||
switch (c) | ||
{ | ||
case '\\': | ||
case '\n': | ||
case '\r': | ||
case 0: | ||
case 0x1A: | ||
case '\'': | ||
break; | ||
default: | ||
if (!(c & 0x80)) | ||
table[c] |= CMsinglechar; | ||
break; | ||
} | ||
} | ||
return table; | ||
}(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/** | ||
* Character tables related to identifiers. | ||
* | ||
* Supports UAX31, C99, C11 and least restrictive (All). | ||
* | ||
* Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved | ||
* Authors: $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole) | ||
* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) | ||
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/charactertables.d, common/charactertables.d) | ||
*/ | ||
|
||
#pragma once | ||
|
||
struct IdentifierCharLookup final | ||
{ | ||
bool(*isStart)(char32_t); | ||
bool(*isContinue)(char32_t); | ||
|
||
// constructor not provided here. | ||
}; |
Oops, something went wrong.