Skip to content

Commit

Permalink
Improve: add named captures to Aliases (Mudlet#7175)
Browse files Browse the repository at this point in the history
#### Brief overview of PR changes/additions
This has been done by carefully comparing the matching code from the
PCRE trigger item. Some variables have been renamed so they match up
better.

#### Motivation for adding to Mudlet
This has been a long-standing but unlisted wish-list item but someone
actually raised an issue for it! 😀

Fortunately much of the needed code was already in place, just the bit
that injected the named group results into the Lua sub-system was
missing (the call to `TLuaInterpreter::setCaptureNameGroups(nameGroups,
namePositions)`)!

#### Other info (issues closed, discussion etc)
It does actually make writing aliases a bit easier because it is easier
to construct an alias using `matches.target` say rather than
`matches[4]` if `?<target>` has been inserted at the start of the
relevant capture group. It also makes extending/modifying an alias
easier as there is no need to juggle indexes in the Lua script that
uses, say, `matches.target` compared to one that uses `matches[4]`.

This should close Mudlet#7171.

Signed-off-by: Stephen Lyons <[email protected]>
  • Loading branch information
SlySven authored Mar 9, 2024
1 parent 32ba1ed commit 7d1f40d
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 39 deletions.
73 changes: 39 additions & 34 deletions src/TAlias.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ void TAlias::setName(const QString& name)
mpHost->getAliasUnit()->mLookupTable.insert(name, this);
}

bool TAlias::match(const QString& toMatch)
bool TAlias::match(const QString& haystack)
{
bool matchCondition = false;
if (!isActive()) {
if (isFolder()) {
if (shouldBeActive()) {
bool matchCondition = false;
for (auto alias : *mpMyChildrenList) {
if (alias->match(toMatch)) {
if (alias->match(haystack)) {
matchCondition = true;
}
}
Expand All @@ -84,36 +84,38 @@ bool TAlias::match(const QString& toMatch)
return false;
}

bool matchCondition = false;
//bool ret = false;
//bool conditionMet = false;
QSharedPointer<pcre> re = mpRegex;
if (re == nullptr) {
return false; //regex compile error
}

#if defined(Q_OS_WIN32)
// strndup(3) - a safe strdup(3) does not seem to be available on mingw32 with GCC-4.9.2
char* subject = static_cast<char*>(malloc(strlen(toMatch.toUtf8().constData()) + 1));
strcpy(subject, toMatch.toUtf8().constData());
char* haystackC = static_cast<char*>(malloc(strlen(haystack.toUtf8().constData()) + 1));
strcpy(haystackC, haystack.toUtf8().constData());
#else
char* subject = strndup(toMatch.toUtf8().constData(), strlen(toMatch.toUtf8().constData()));
char* haystackC = strndup(haystack.toUtf8().constData(), strlen(haystack.toUtf8().constData()));
#endif
unsigned char* name_table;
int namecount;
int name_entry_size;

int subject_length = strlen(subject);
int rc, i;
// These must be initialised before any goto so the latter does not jump
// over them:
int namecount = 0;
char* tabptr = nullptr;
NameGroupMatches nameGroups;
QMap<QString, QPair<int, int>> namePositions;
std::list<std::string> captureList;
std::list<int> posList;
int ovector[MAX_CAPTURE_GROUPS * 3];
int name_entry_size = 0;
int haystackCLength = strlen(haystackC);
int rc = 0;
int i = 0;
int ovector[MAX_CAPTURE_GROUPS * 3] = {0};

//cout <<" LINE="<<subject<<endl;
if (mRegexCode.isEmpty()) {
goto MUD_ERROR;
}
rc = pcre_exec(re.data(), nullptr, subject, subject_length, 0, 0, ovector, MAX_CAPTURE_GROUPS * 3);

rc = pcre_exec(re.data(), nullptr, haystackC, haystackCLength, 0, 0, ovector, MAX_CAPTURE_GROUPS * 3);

if (rc < 0) {
goto MUD_ERROR;
Expand All @@ -136,7 +138,7 @@ bool TAlias::match(const QString& toMatch)
matchCondition = true; // alias has matched

for (i = 0; i < rc; i++) {
char* substring_start = subject + ovector[2 * i];
char* substring_start = haystackC + ovector[2 * i];
int substring_length = ovector[2 * i + 1] - ovector[2 * i];

std::string match;
Expand All @@ -153,35 +155,37 @@ bool TAlias::match(const QString& toMatch)
TDebug(Qt::darkMagenta, Qt::black) << TDebug::csmContinue << "<" << match.c_str() << ">\n" >> mpHost;
}
}
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMECOUNT, &namecount);

if (namecount <= 0) {
//cout << "no named substrings detected" << endl;
} else {
unsigned char* tabptr;
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMETABLE, &name_table);
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMECOUNT, &namecount);

if (namecount > 0) {
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMETABLE, &tabptr);
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);

tabptr = name_table;
for (i = 0; i < namecount; i++) {
//int n = (tabptr[0] << 8) | tabptr[1];
for (i = 0; i < namecount; ++i) {
const int n = (tabptr[0] << 8) | tabptr[1];
auto name = QString::fromUtf8(&tabptr[2]).trimmed();
auto* substring_start = haystackC + ovector[2*n];
auto substring_length = ovector[2*n+1] - ovector[2*n];
auto utf16_pos = haystack.indexOf(QString(substring_start));
auto capture = QString::fromUtf8(substring_start, substring_length);
nameGroups << qMakePair(name, capture);
tabptr += name_entry_size;
namePositions.insert(name, qMakePair(utf16_pos, substring_length));
}
}
//TODO: add named groups separately later as Lua::namedGroups

for (;;) {
int options = 0;
int start_offset = ovector[1];

if (ovector[0] == ovector[1]) {
if (ovector[0] >= subject_length) {
if (ovector[0] >= haystackCLength) {
goto END;
}
options = PCRE_NOTEMPTY | PCRE_ANCHORED;
}

rc = pcre_exec(re.data(), nullptr, subject, subject_length, start_offset, options, ovector, MAX_CAPTURE_GROUPS * 3);
rc = pcre_exec(re.data(), nullptr, haystackC, haystackCLength, start_offset, options, ovector, MAX_CAPTURE_GROUPS * 3);
if (rc == PCRE_ERROR_NOMATCH) {
if (options == 0) {
break;
Expand All @@ -201,7 +205,7 @@ bool TAlias::match(const QString& toMatch)
}

for (i = 0; i < rc; i++) {
char* substring_start = subject + ovector[2 * i];
char* substring_start = haystackC + ovector[2 * i];
int substring_length = ovector[2 * i + 1] - ovector[2 * i];
std::string match;
if (substring_length < 1) {
Expand All @@ -222,19 +226,20 @@ bool TAlias::match(const QString& toMatch)
END : {
TLuaInterpreter* pL = mpHost->getLuaInterpreter();
pL->setCaptureGroups(captureList, posList);
pL->setCaptureNameGroups(nameGroups, namePositions);
// call lua trigger function with number of matches and matches itselves as arguments
execute();
pL->clearCaptureGroups();
}

MUD_ERROR:
for (auto childAlias : *mpMyChildrenList) {
if (childAlias->match(toMatch)) {
if (childAlias->match(haystack)) {
matchCondition = true;
}
}

free(subject);
free(haystackC);
return matchCondition;
}

Expand Down
3 changes: 3 additions & 0 deletions src/TAlias.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class Host;

#define MAX_CAPTURE_GROUPS 33

using NameGroupMatches = QVector<QPair<QString, QString>>;

class TAlias : public Tree<TAlias>
{
Q_DECLARE_TR_FUNCTIONS(TAlias) // Needed so we can use tr() even though TAlias is NOT derived from QObject
Expand Down Expand Up @@ -78,6 +80,7 @@ class TAlias : public Tree<TAlias>
QString mFuncName;
bool exportItem = true;
bool mRegisteredAnonymousLuaFunction = false;
QVector<NameGroupMatches> nameCaptures;

private:
bool mNeedsToBeCompiled = true;
Expand Down
10 changes: 5 additions & 5 deletions src/TTrigger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,18 +360,18 @@ void TTrigger::processRegexMatch(const char* haystackC, const QString& haystack,
}
}

int namecount; //NOLINT(cppcoreguidelines-init-variables)
int name_entry_size; //NOLINT(cppcoreguidelines-init-variables)
char* tabptr; //NOLINT(cppcoreguidelines-init-variables)
int namecount = 0;
int name_entry_size = 0;
char* tabptr = nullptr;

pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMECOUNT, &namecount);

if (namecount > 0) {
// Based on snippet https://github.com/vmg/pcre/blob/master/pcredemo.c#L216
// Retrieves char table end entry size and extracts name of group and captures from
// Retrieves char table end entry size and extracts name of group and captures from
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMETABLE, &tabptr);
pcre_fullinfo(re.data(), nullptr, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
for (i = 0; i < namecount; i++) {
for (i = 0; i < namecount; ++i) {
const int n = (tabptr[0] << 8) | tabptr[1];
auto name = QString::fromUtf8(&tabptr[2]).trimmed(); //NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic, cppcoreguidelines-pro-bounds-constant-array-index)
auto* substring_start = haystackC + ovector[2*n]; //NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic, cppcoreguidelines-pro-bounds-constant-array-index)
Expand Down

0 comments on commit 7d1f40d

Please sign in to comment.