Skip to content

Commit

Permalink
Merge pull request #185 from ChlodAlejandro/chlod/external-link-tags
Browse files Browse the repository at this point in the history
Process masked external links in tag wikitext
  • Loading branch information
MusikAnimal authored Aug 22, 2024
2 parents 4b91da1 + a4cb89a commit 35c9c1b
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 8 deletions.
55 changes: 49 additions & 6 deletions src/Model/Record.php
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public function getTags(): array {
*/
public function getTagLabels(): array {
return array_map( function ( $tag ) {
return $this->parseWikitext( $tag );
return $this->parseWikitext( $tag, true );
}, $this->data['tags_labels'] ?? [] );
}

Expand All @@ -202,18 +202,61 @@ public function getTagLabels(): array {
*
* @see https://github.com/x-tools/xtools/blob/4795fb88dd392bb0474219be3ef9a1fc019a228b/src/Model/Edit.php#L336
* @param string $wikitext
* @param bool $includeExternalLinks Whether to include masked external links as part of parsing.
* @return string
*/
public function parseWikitext( string $wikitext ): string {
public function parseWikitext( string $wikitext, bool $includeExternalLinks = false ): string {
$wikitext = htmlspecialchars( html_entity_decode( $wikitext ), ENT_NOQUOTES );
// Hold a list of tokens so that we don't end up replacing the same thing twice.
$tokenList = [];

// This regex is from https://stackoverflow.com/a/6041965/604142
// This should only have one capture group: the whole URL.
// Ensure all other groups are (?:non-capturing).
$urlRegex = '\b((?:[\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|(?:[^[:punct:]\s]|/)))';

// Process masked external links, if requested.
// This goes before we process raw links, so that we don't convert both.
if ( $includeExternalLinks ) {
$wikitext = preg_replace_callback(
"%\[$urlRegex ([^]]+)]%s",
static function ( $matches ) use ( &$tokenList, $urlRegex ) {
// Do not convert if label URL match is `1` (is a URL) or
// `false` (failure), for safety
if ( preg_match( "%$urlRegex%s", $matches[2] ) !== 0 ) {
return $matches[0];
}

do {
$id = rand();
} while ( isset( $tokenList[$id] ) );
$token = '<!--copypatrol:token:' . $id . '-->';
$tokenList[$id] = "<a target='_blank' rel='nofollow' href='${matches[1]}'>${matches[2]}</a>";
return $token;
},
$wikitext
);
}

// First link raw URLs. Courtesy of https://stackoverflow.com/a/11641499/604142
$wikitext = preg_replace(
'%\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))%s',
'<a target="_blank" href="$1">$1</a>',
// Link raw URLs.
$wikitext = preg_replace_callback(
"%$urlRegex%s",
static function ( $matches ) use ( &$tokenList ) {
do {
$id = rand();
} while ( isset( $tokenList[$id] ) );
$token = '<!--copypatrol:token:' . $id . '-->';
$tokenList[$id] = "<a target='_blank' rel='nofollow' href='${matches[1]}'>${matches[1]}</a>";
return $token;
},
$wikitext
);

// Replace all tokens from previous two steps.
foreach ( $tokenList as $id => $replacement ) {
$wikitext = str_replace( '<!--copypatrol:token:' . $id . '-->', $replacement, $wikitext );
}

$sectionMatch = null;
$isSection = preg_match_all( "/^\/\* (.*?) \*\//", $wikitext, $sectionMatch );
$pageUrl = $this->getPageUrl();
Expand Down
39 changes: 37 additions & 2 deletions tests/Model/RecordTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,50 @@ public function testStatusJson(): void {
}

public function testParseWikitext(): void {
// XSS
static::assertEquals(
"&lt;script&gt;alert(\"XSS baby\")&lt;/script&gt; " .
"<a target='_blank' href='https://en.wikipedia.org/wiki/Test_page'>test page</a>",
$this->record->parseWikitext( '<script>alert("XSS baby")</script> [[test page]]' )
);

// Wikilink
static::assertEquals(
'<a target="_blank" href="https://example.org">https://example.org</a>',
$this->record->parseWikitext( 'https://example.org' )
"<a target='_blank' href='https://en.wikipedia.org/wiki/MediaWiki'>MediaWiki</a>",
$this->record->parseWikitext( "[[MediaWiki]]" )
);

// Wikilink (starting with `:`)
static::assertEquals(
"<a target='_blank' href='https://en.wikipedia.org/wiki/MediaWiki'>MediaWiki</a>",
$this->record->parseWikitext( "[[:MediaWiki]]" )
);

// Raw link
static::assertEquals(
"<a target='_blank' rel='nofollow' href='https://example.org'>https://example.org</a>",
$this->record->parseWikitext( "https://example.org" )
);

// Masked external link
static::assertEquals(
"[<a target='_blank' rel='nofollow' href='https://example.org'>https://example.org</a> test]",
$this->record->parseWikitext( "[https://example.org test]" )
);

// == WITH MASKED EXTERNAL LINKS ==

// Masked external link
static::assertEquals(
"<a target='_blank' rel='nofollow' href='https://example.org'>test</a>",
$this->record->parseWikitext( "[https://example.org test]", true )
);

// Misleading masked external link
static::assertEquals(
"[<a target='_blank' rel='nofollow' href='https://evil.example.org'>https://evil.example.org</a> "
. "<a target='_blank' rel='nofollow' href='https://example.org'>https://example.org</a>]",
$this->record->parseWikitext( "[https://evil.example.org https://example.org]", true )
);
}

Expand Down

0 comments on commit 35c9c1b

Please sign in to comment.