Skip to content

Commit

Permalink
fix regex catastrophic backtracking (wikimedia-gadgets#269)
Browse files Browse the repository at this point in the history
* fix regex catastrophic backtracking

Fix wikimedia-gadgets#245

This new code is vulnerable to deleting the wrong heading if someone puts a category in the wrong place (not at the bottom of the article), but I think that's an acceptable tradeoff for now. If it actually affects someone we can make the solution more complex in a future patch.

* only trim sometimes

* add test

* handle disabled categories

* rewrite algorithm. add tests

* compact

* compact

* comment

* comment
  • Loading branch information
NovemLinguae authored Dec 22, 2023
1 parent ed134aa commit 916c622
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 2 deletions.
58 changes: 58 additions & 0 deletions src/modules/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,64 @@
return newCode;
},

/**
* Remove empty section at the end of the draft. Empty sections at the end of drafts
* frequently happen because of how the "Resubmit" button on the "declined" template
* works. The empty section may have categories after it - keep them there.
*
* @param {string} wikicode
*/
removeEmptySectionAtEnd: function ( wikicode ) {
// Hard to write a regex that doesn't catastrophic backtrack while still saving multiple categories and multiple blank lines. So we'll do this the old-fashioned way...

// Divide wikitext into lines
var lines = wikicode.split( '\n' );

// Buffers
var linesToKeep = [];
var i;

// Crawl the list of lines backward (bottom up)
var count = lines.length;
for ( i = count - 1; i >= 0; i-- ) {
var line = lines[ i ];
var isWhitespace = line.match( /^\s*$/ );
var isCategory = line.match( /^\s*\[\[:?Category:/i );
var isHeading = line.match( /^==[^=]+==$/i );

if ( isWhitespace || isCategory ) {
linesToKeep.push( line );
continue;
} else if ( isHeading ) {
break;
}

// If it's something besides the three things above, such as text, then there's no blank headings to delete. Return unaltered wikitext. We're done.
return wikicode;
}

// Delete the lines we checked from the array of lines. We'll be replacing these with new lines in a moment.
lines = lines.slice( 0, i );

// Add the categories and blank lines back
// Need to iterate backward, same as the loop above
count = linesToKeep.length;
for ( var j = count - 1; j >= 0; j-- ) {
var lineToKeep = linesToKeep[ j ];
lines.push( lineToKeep );
}

wikicode = lines.join( '\n' );

// The old algorithm had some quirks related to adding and removing \n. Mimic the old algorithm for now, so that unit tests pass and users don't have to get used to new behavior.
if ( wikicode.match( /\n\n$/ ) ) {
wikicode = wikicode.slice( 0, -1 );
}
wikicode = wikicode.replace( /\n(\n\n\[\[:?Category:)/i, '$1' );

return wikicode;
},

/**
* Returns the relative time that has elapsed between an oldDate and a nowDate
*
Expand Down
2 changes: 1 addition & 1 deletion src/modules/submissions.js
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@

// Remove empty section at the end (caused by "Resubmit" button on "declined" template)
// Section may have categories after it - keep them there
text = text.replace( /\n+==.+?==((?:\[\[:?Category:.+?\]\]|\s+)*)$/, '$1' );
text = AFCH.removeEmptySectionAtEnd( text );

// Assemble a master regexp and remove all now-unneeded comments (commentsToRemove)
commentRegex = new RegExp( '<!-{2,}\\s*(' + commentsToRemove.join( '|' ) + ')\\s*-{2,}>', 'gi' );
Expand Down
8 changes: 7 additions & 1 deletion tests/scaffold.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ mw.user = {
};

mw.loader = {
using: function () { return { then: function ( callback ) { callback(); } }; }
using: function () {
return {
then: function ( callback ) {
callback();
}
};
}
};

var basePageHtml = fs.readFileSync( './tests/test-frame.html' ).toString();
Expand Down
99 changes: 99 additions & 0 deletions tests/test-core.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,102 @@ describe( 'AFCH', function () {
describe( 'AFCH.Page', function () {
// FIXME...
} );

describe( 'AFCH.removeEmptySectionAtEnd', function () {
it( 'no headings', function () {
var wikicode = 'Test';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test' );
} );

it( 'one heading with body text', function () {
var wikicode = 'Test\n\n==Test2==\nMore test text\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\nMore test text\n' );
} );

it( 'two headings with body text', function () {
var wikicode = 'Test\n\n==Test2==\nMore test text\n\n== Test 3 ==\nYour text here.\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\nMore test text\n\n== Test 3 ==\nYour text here.\n' );
} );

it( 'two headings with body text and with categories', function () {
var wikicode = 'Test\n\n==Test2==\nMore test text\n\n== Test 3 ==\nYour text here.\n[[Category:Test]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\nMore test text\n\n== Test 3 ==\nYour text here.\n[[Category:Test]]\n' );
} );

it( '1 heading, 1 category, 1 heading, 1 empty heading', function () {
var wikicode = 'Test\n\n==Test2==\nMore test text\n\n[[Category:Test]]\n\n== Test 3 ==\nYour text here.\n\n== Test 4 ==\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\nMore test text\n\n[[Category:Test]]\n\n== Test 3 ==\nYour text here.\n' );
} );

it( '1 heading, 2 categories, 1 heading, 1 empty heading', function () {
var wikicode = 'Test\n\n==Test2==\nMore test text\n\n[[Category:Test]]\n[[Category:Test2]]\n\n== Test 3 ==\nYour text here.\n\n== Test 4 ==\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\nMore test text\n\n[[Category:Test]]\n[[Category:Test2]]\n\n== Test 3 ==\nYour text here.\n' );
} );

it( '1 empty heading, 2 categories, 1 heading, 1 empty heading', function () {
var wikicode = 'Test\n\n==Test2==\n[[Category:Test]]\n[[Category:Test2]]\n\n== Test 3 ==\nYour text here.\n\n== Test 4 ==\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n[[Category:Test]]\n[[Category:Test2]]\n\n== Test 3 ==\nYour text here.\n' );
} );

it( 'one heading without body text', function () {
var wikicode = 'Test\n\n==Test2==\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n' );
} );

it( 'two headings without body text', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n' );
} );

it( 'two headings without body text and with one category', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n\n[[Category:Test]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n\n[[Category:Test]]\n' );
} );

it( 'disabled category', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n\n[[:Category:Test]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n\n[[:Category:Test]]\n' );
} );

it( 'two headings without body text and with two categories #1', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n\n[[Category:Test]]\n[[Category:Test2]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n\n[[Category:Test]]\n[[Category:Test2]]\n' );
} );

it( 'two headings without body text and with two categories #2', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n\n[[Category:Test]]\n\n[[Category:Test2]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n\n[[Category:Test]]\n\n[[Category:Test2]]\n' );
} );

it( 'two headings without body text and with two categories #3', function () {
var wikicode = 'Test\n\n==Test2==\n\n== Test 3 ==\n\n[[Category:Test]]\n\n [[Category:Test2]]\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n==Test2==\n\n[[Category:Test]]\n\n [[Category:Test2]]\n' );
} );

it( 'don\'t trim if no heading was deleted', function () {
var wikicode = 'Test\n\n';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( 'Test\n\n' );
} );

// Catastrophic backtracking occurs if this test causes the test suite to get stuck for a long time
it( 'should not cause regex catastrophic backtracking', function () {
var wikicode = '{{AFC submission}}\n==A==\n \nB';
var output = AFCH.removeEmptySectionAtEnd( wikicode );
expect( output ).toBe( '{{AFC submission}}\n==A==\n \nB' );
} );
} );

0 comments on commit 916c622

Please sign in to comment.