Skip to content

Commit

Permalink
Added a new popGrapheme function to std.uni
Browse files Browse the repository at this point in the history
  • Loading branch information
dukc committed Sep 24, 2024
1 parent 2168bec commit d02b439
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 17 deletions.
37 changes: 37 additions & 0 deletions changelog/pop-grapheme.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Added popGrapheme function to std.uni.

The new function is a cross between the existing $(REF graphemeStride, std, uni)
and $(REF decodeGrapheme, std, uni) functions. The new function both supports
all four attributes like `graphemeStride` does as long as you don't rely on
autodecoding (side node: `@nogc` support for `graphemeStride` added in this
release), and works with any non-array ranges just like `decodeGrapheme` does.

Example:

-------
import std.uni;

// Two Union Jacks of the Great Britain in each
string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";

// String pop length in code units, not points.
assert(s.popGrapheme() == 8);
assert(ws.popGrapheme() == 4);
assert(ds.popGrapheme() == 2);

assert(s == "\U0001F1EC\U0001F1E7");
assert(ws == "\U0001F1EC\U0001F1E7");
assert(ds == "\U0001F1EC\U0001F1E7");

import std.algorithm.comparison : equal;
import std.algorithm.iteration : filter;

// Also works for non-random access ranges as long as the
// character type is 32-bit.
auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
// Windows-style line ending is two code point in a single grapheme.
assert(testPiece.popGrapheme() == 2);
assert(testPiece.equal("!"d));
-------
113 changes: 96 additions & 17 deletions std/uni/package.d
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,8 @@ import std.meta : AliasSeq;
import std.range.primitives : back, ElementEncodingType, ElementType, empty,
front, hasLength, hasSlicing, isForwardRange, isInputRange,
isRandomAccessRange, popFront, put, save;
import std.traits : isConvertibleToString, isIntegral, isSomeChar,
isSomeString, Unqual, isDynamicArray;
import std.traits : isAutodecodableString, isConvertibleToString, isIntegral,
isSomeChar, isSomeString, Unqual, isDynamicArray;
// debug = std_uni;

import std.internal.unicode_tables; // generated file
Expand Down Expand Up @@ -7148,17 +7148,25 @@ private immutable TransformRes
TransformRes.goOn
];

template genericDecodeGrapheme(bool getValue)
{
static if (getValue)
enum GraphemeRet { none, step, value }

template genericDecodeGrapheme(GraphemeRet retType)
{ alias Ret = GraphemeRet;

static if (retType == Ret.value)
alias Value = Grapheme;
else
else static if (retType == Ret.step)
alias Value = size_t;
else static if (retType == Ret.none)
alias Value = void;

Value genericDecodeGrapheme(Input)(ref Input range)
{
static if (getValue)
Grapheme grapheme;
static if (retType == Ret.value)
Grapheme result;
else static if (retType == Ret.step)
size_t result = 0;

auto state = GraphemeState.Start;
dchar ch;

Expand All @@ -7173,17 +7181,21 @@ template genericDecodeGrapheme(bool getValue)
with(TransformRes)
{
case goOn:
static if (getValue)
grapheme ~= ch;
static if (retType == Ret.value)
result ~= ch;
else static if (retType == Ret.step)
result++;
range.popFront();
continue;

case redo:
goto rerun;

case retInclude:
static if (getValue)
grapheme ~= ch;
static if (retType == Ret.value)
result ~= ch;
else static if (retType == Ret.step)
result++;
range.popFront();
break outer;

Expand All @@ -7192,8 +7204,8 @@ template genericDecodeGrapheme(bool getValue)
}
}

static if (getValue)
return grapheme;
static if (retType != Ret.none)
return result;
}
}

Expand All @@ -7217,7 +7229,7 @@ if (is(C : dchar))
{
auto src = input[index..$];
auto n = src.length;
genericDecodeGrapheme!(false)(src);
genericDecodeGrapheme!(GraphemeRet.none)(src);
return n - src.length;
}

Expand Down Expand Up @@ -7279,7 +7291,7 @@ if (is(C : dchar))
Grapheme decodeGrapheme(Input)(ref Input inp)
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
{
return genericDecodeGrapheme!true(inp);
return genericDecodeGrapheme!(GraphemeRet.value)(inp);
}

@safe unittest
Expand All @@ -7304,6 +7316,73 @@ if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
assert(equal(decodeGrapheme(s)[], "\U0001F1EC\U0001F1E7"));
}

/++
Reads one full grapheme cluster from an
$(REF_ALTTEXT input range, isInputRange, std,range,primitives) of dchar `inp`,
but doesn't return it. Instead returns the number of code units read.
This differs from number of code points read only if `input` is an
autodecodable string.
Note:
This function modifies `inp` and thus `inp`
must be an L-value.
+/
size_t popGrapheme(Input)(ref Input inp)
if (isInputRange!Input && is(immutable ElementType!Input == immutable dchar))
{
static if (isAutodecodableString!Input || isRandomAccessRange!Input)
{
// Why count each step in the decoder when you can just
// measure the grapheme in one go?
auto n = inp.length;
genericDecodeGrapheme!(GraphemeRet.none)(inp);
return n - inp.length;
}
else return genericDecodeGrapheme!(GraphemeRet.step)(inp);
}

///
@safe pure unittest
{
// Two Union Jacks of the Great Britain in each
string s = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
wstring ws = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";
dstring ds = "\U0001F1EC\U0001F1E7\U0001F1EC\U0001F1E7";

// String pop length in code units, not points.
assert(s.popGrapheme() == 8);
assert(ws.popGrapheme() == 4);
assert(ds.popGrapheme() == 2);

assert(s == "\U0001F1EC\U0001F1E7");
assert(ws == "\U0001F1EC\U0001F1E7");
assert(ds == "\U0001F1EC\U0001F1E7");

import std.algorithm.comparison : equal;
import std.algorithm.iteration : filter;

// Also works for non-random access ranges as long as the
// character type is 32-bit.
auto testPiece = "\r\nhello!"d.filter!(x => !x.isAlpha);
// Windows-style line ending is two code points in a single grapheme.
assert(testPiece.popGrapheme() == 2);
assert(testPiece.equal("!"d));
}

// Attribute compliance test. Should be nothrow `@nogc` when
// no autodecoding needed.
@safe pure nothrow @nogc unittest
{
import std.algorithm.iteration : filter;

auto str = "abcdef"d;
assert(str.popGrapheme() == 1);

// also test with non-random access
auto filtered = "abcdef"d.filter!(x => x%2);
assert(filtered.popGrapheme() == 1);
}

/++
$(P Iterate a string by $(LREF Grapheme).)
Expand Down Expand Up @@ -7722,7 +7801,7 @@ public:
@property bool valid()() /*const*/
{
auto r = this[];
genericDecodeGrapheme!false(r);
genericDecodeGrapheme!(GraphemeRet.none)(r);
return r.length == 0;
}

Expand Down

0 comments on commit d02b439

Please sign in to comment.