diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index 90b0ca7..8b41090 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -102,6 +102,7 @@ public RootNode parse(char[] source) { //************* BLOCKS **************** + @MemoMismatches public Rule Root() { return NodeSequence( push(new RootNode()), @@ -109,6 +110,7 @@ public Rule Root() { ); } + @MemoMismatches public Rule Block() { return Sequence( ZeroOrMore(BlankLine()), @@ -126,6 +128,7 @@ public Rule Block() { ); } + @MemoMismatches public Rule Para() { return NodeSequence( // The Para Rule only tests for the presence of a following blank line, but does not consume it. @@ -139,6 +142,7 @@ public Rule Para() { // vsch: #184 modified to only include trailing blank lines if there are more blocks for the blockquote following // otherwise don't include the blank lines, they are not part of the block quote + @MemoMismatches public Rule BlockQuote() { StringBuilderVar inner = new StringBuilderVar(); StringBuilderVar optional = new StringBuilderVar(); @@ -165,6 +169,7 @@ public Rule BlockQuote() { ); } + @MemoMismatches public Rule Verbatim() { StringBuilderVar text = new StringBuilderVar(); StringBuilderVar line = new StringBuilderVar(); @@ -184,7 +189,8 @@ public Rule Verbatim() { push(new VerbatimNode(text.getString())) ); } - + + @MemoMismatches public Rule FencedCodeBlock() { StringBuilderVar text = new StringBuilderVar(); Var markerLength = new Var(); @@ -211,7 +217,8 @@ public Rule CodeFence(Var markerLength) { Newline() ); } - + + @MemoMismatches public Rule HorizontalRule() { return NodeSequence( NonindentSpace(), @@ -228,10 +235,12 @@ public Rule HorizontalRule(char c) { //************* HEADINGS **************** + @MemoMismatches public Rule Heading() { return NodeSequence(FirstOf(AtxHeading(), SetextHeading())); } + @MemoMismatches public Rule AtxHeading() { return Sequence( AtxStart(), @@ -246,6 +255,7 @@ public Rule AtxHeading() { ); } + @MemoMismatches public Rule AtxStart() { return Sequence( FirstOf("######", "#####", "####", "###", "##", "#"), @@ -253,6 +263,7 @@ public Rule AtxStart() { ); } + @MemoMismatches public Rule AtxInline() { return Sequence( TestNot(Newline()), @@ -272,6 +283,7 @@ public Rule AtxInline() { // defacto invisible [control] characters). If applicable, grep // all such lines globally using the pattern "^--[-]*[\s\t]*$" // + @MemoMismatches public Rule SetextHeading() { return Sequence( // test for successful setext heading before actually building it to reduce backtracking @@ -281,6 +293,7 @@ public Rule SetextHeading() { } // vsch: #186 add isSetext flag to header node to distinguish header types + @MemoMismatches public Rule SetextHeading1() { return Sequence( SetextInline(), push(new HeaderNode(1, popAsNode(), true)), @@ -290,6 +303,7 @@ public Rule SetextHeading1() { ); } + @MemoMismatches public Rule SetextHeading2() { return Sequence( SetextInline(), push(new HeaderNode(2, popAsNode(), true)), @@ -299,6 +313,7 @@ public Rule SetextHeading2() { ); } + @MemoMismatches public Rule SetextInline() { return Sequence(TestNot(Endline()), Inline()); } @@ -358,7 +373,8 @@ public void collectChildrensText(SuperNode node, AnchorNodeInfo nodeInfo) { } //************** Definition Lists ************ - + + @MemoMismatches public Rule DefinitionList() { return NodeSequence( // test for successful definition list match before actually building it to reduce backtracking @@ -379,7 +395,8 @@ public Rule DefinitionList() { ) ); } - + + @MemoMismatches public Rule DefListTerm() { return NodeSequence( TestNot(Spacechar()), @@ -390,7 +407,8 @@ public Rule DefListTerm() { Newline() ); } - + + @MemoMismatches public Rule DefTermInline() { return Sequence( NotNewline(), @@ -398,7 +416,8 @@ public Rule DefTermInline() { Inline() ); } - + + @MemoMismatches public Rule Definition() { SuperNodeCreator itemNodeCreator = new SuperNodeCreator() { public SuperNode create(Node child) { @@ -407,13 +426,15 @@ public SuperNode create(Node child) { }; return ListItem(DefListBullet(), itemNodeCreator); } - + + @MemoMismatches public Rule DefListBullet() { return Sequence(NonindentSpace(), AnyOf(":~"), OneOrMore(Spacechar())); } //************* LISTS **************** + @MemoMismatches public Rule BulletList() { if (ext(TASKLISTITEMS)) { // #185 GFM style task list items [ ] open task, [x] closed task handlings @@ -451,6 +472,7 @@ public SuperNode create(Node child) { } } + @MemoMismatches public Rule OrderedList() { SuperNodeCreator itemNodeCreator = new SuperNodeCreator() { public SuperNode create(Node child) { @@ -608,6 +630,7 @@ public Rule ListItemIndentedBlocks(StringBuilderVar block) { ); } + @MemoMismatches public Rule TestNotItem() { return TestNot( FirstOf(new ArrayBuilder() @@ -617,7 +640,8 @@ public Rule TestNotItem() { ) ); } - + + @MemoMismatches public Rule TestNotListItem() { return TestNot( FirstOf(new ArrayBuilder() @@ -628,10 +652,12 @@ public Rule TestNotListItem() { ); } + @MemoMismatches public Rule Enumerator() { return Sequence(NonindentSpace(), OneOrMore(Digit()), '.', OneOrMore(Spacechar())); } + @MemoMismatches public Rule Bullet() { return Sequence(TestNot(HorizontalRule()), NonindentSpace(), AnyOf("+*-"), OneOrMore(Spacechar())); } @@ -716,6 +742,7 @@ boolean setListItemIndices() { //************* HTML BLOCK **************** + @MemoMismatches public Rule HtmlBlock() { return NodeSequence( FirstOf(HtmlBlockInTags(), HtmlComment(), HtmlBlockSelfClosing()), @@ -724,6 +751,7 @@ public Rule HtmlBlock() { ); } + @MemoMismatches public Rule HtmlBlockInTags() { StringVar tagName = new StringVar(); return Sequence( @@ -746,6 +774,7 @@ public Rule HtmlTagBlock(StringVar tagName) { ); } + @MemoMismatches public Rule HtmlBlockSelfClosing() { StringVar tagName = new StringVar(); return Sequence('<', Spn1(), DefinedHtmlTagName(tagName), Spn1(), ZeroOrMore(HtmlAttribute()), Optional('/'), @@ -812,6 +841,7 @@ public boolean isHtmlTag(String string) { //************* INLINES **************** + @MemoMismatches public Rule Inlines() { return NodeSequence( InlineOrIntermediateEndline(), push(new SuperNode(popAsNode())), @@ -820,13 +850,14 @@ public Rule Inlines() { ); } + @MemoMismatches public Rule InlineOrIntermediateEndline() { return FirstOf( Sequence(TestNot(Endline()), Inline()), Sequence(Endline(), Test(Inline())) ); } - + @MemoMismatches public Rule Inline() { return Sequence( @@ -835,10 +866,12 @@ public Rule Inline() { ); } + @MemoMismatches public Rule NonAutoLinkInline() { return FirstOf(NonAutoLink(), NonLinkInline()); } + @MemoMismatches public Rule NonLinkInline() { return FirstOf(new ArrayBuilder() .add(plugins.getInlinePluginRules()) @@ -857,14 +890,17 @@ public Rule Endline() { return NodeSequence(FirstOf(LineBreak(), TerminalEndline(), NormalEndline())); } + @MemoMismatches public Rule LineBreak() { return Sequence(" ", NormalEndline(), poke(new SimpleNode(Type.Linebreak))); } + @MemoMismatches public Rule TerminalEndline() { return NodeSequence(Sp(), Newline(), Test(EOI), push(new TextNode("\n"))); } + @MemoMismatches public Rule NormalEndline() { return Sequence( Sp(), Newline(), @@ -898,7 +934,8 @@ public Rule UlOrStarLine() { public Rule CharLine(char c) { return FirstOf(NOrMore(c, 4), Sequence(Spacechar(), OneOrMore(c), Test(Spacechar()))); } - + + @MemoMismatches public Rule StrongOrEmph() { return Sequence( Test(AnyOf("*_")), @@ -906,15 +943,18 @@ public Rule StrongOrEmph() { ); } + @MemoMismatches public Rule Emph() { return NodeSequence( FirstOf( EmphOrStrong("*"), EmphOrStrong("_") ) ); } + @MemoMismatches public Rule Strong() { return NodeSequence( FirstOf( EmphOrStrong("**"), EmphOrStrong("__") ) ); } // vsch: TODO: test for unclosed strikethrough sequence carrying through the isClosed attribute, as soon as I can figure out how + @MemoMismatches public Rule Strike() { // vsch: we need to preserve isClosed() attribute of the StrongEmphSuperNode, otherwise we can have // a strike sequence which is not closed and it makes it difficult to split out the lead-in/termination @@ -951,7 +991,7 @@ public Rule EmphOrStrong(String chars) { Optional(Sequence(EmphOrStrongClose(chars), setClosed())) ); } - + public Rule EmphOrStrongOpen(String chars) { return Sequence( TestNot(CharLine(chars.charAt(0))), @@ -1104,6 +1144,7 @@ protected boolean isLegalEmphOrStrongClosePos(){ //************* LINKS **************** + @MemoMismatches public Rule Image() { return NodeSequence( '!', ImageAlt(), @@ -1123,6 +1164,7 @@ public Rule Link() { ); } + @MemoMismatches public Rule NonAutoLink() { return NodeSequence(Sequence(Label(), FirstOf(ExplicitLink(false), ReferenceLink(false)))); } @@ -1179,6 +1221,7 @@ public Rule LinkSource() { ); } + @MemoMismatches public Rule LinkTitle() { return FirstOf(LinkTitle('\''), LinkTitle('"')); } @@ -1192,6 +1235,7 @@ public Rule LinkTitle(char delimiter) { ); } + @MemoMismatches public Rule AutoLink() { return Sequence( ext(AUTOLINKS) ? Optional('<') : Ch('<'), @@ -1200,6 +1244,7 @@ public Rule AutoLink() { ); } + @MemoMismatches public Rule WikiLink() { return Sequence( "[[", @@ -1209,6 +1254,7 @@ public Rule WikiLink() { ); } + @MemoMismatches public Rule AutoLinkUrl() { return Sequence( Sequence(OneOrMore(Letter()), "://", AutoLinkEnd()), @@ -1216,6 +1262,7 @@ public Rule AutoLinkUrl() { ); } + @MemoMismatches public Rule AutoLinkEmail() { return Sequence( Sequence(OneOrMore(FirstOf(Alphanumeric(), AnyOf("-+_."))), '@', AutoLinkEnd()), @@ -1223,6 +1270,7 @@ public Rule AutoLinkEmail() { ); } + @MemoMismatches public Rule AutoLinkEnd() { return OneOrMore( TestNot(Newline()), @@ -1241,6 +1289,7 @@ public Rule AutoLinkEnd() { //************* REFERENCE **************** // can't treat labels the same as the image alt since the image alt should be able to empty. + @MemoMismatches public Rule ImageAlt(){ return Sequence( '[', @@ -1251,6 +1300,7 @@ public Rule ImageAlt(){ ); } + @MemoMismatches public Rule Label() { return Sequence( '[', @@ -1262,6 +1312,7 @@ public Rule Label() { } // here we exclude the EOL at the end from the node's text range + @MemoMismatches public Rule Reference() { return Sequence( ReferenceNoEOL(), @@ -1269,6 +1320,7 @@ public Rule Reference() { ); } + @MemoMismatches public Rule ReferenceNoEOL() { Var ref = new Var(); return NodeSequence( @@ -1309,6 +1361,7 @@ public Rule RefTitle(char open, char close, Var ref) { //************* CODE **************** + @MemoMismatches public Rule Code() { return NodeSequence( Test('`'), @@ -1344,6 +1397,7 @@ public Rule Ticks(int count) { //************* RAW HTML **************** + @MemoMismatches public Rule InlineHtml() { return NodeSequence( FirstOf(HtmlComment(), HtmlTag()), @@ -1351,15 +1405,18 @@ public Rule InlineHtml() { ); } + @MemoMismatches public Rule HtmlComment() { return Sequence(""), ANY), "-->"); } + @MemoMismatches public Rule HtmlTag() { return Sequence('<', Spn1(), Optional('/'), OneOrMore(Alphanumeric()), Spn1(), ZeroOrMore(HtmlAttribute()), Optional('/'), Spn1(), '>'); } + @MemoMismatches public Rule HtmlAttribute() { return Sequence( OneOrMore(FirstOf(Alphanumeric(), '-', '_')), @@ -1369,6 +1426,7 @@ public Rule HtmlAttribute() { ); } + @MemoMismatches public Rule Quoted() { return FirstOf( Sequence('"', ZeroOrMore(TestNot('"'), ANY), '"'), @@ -1378,6 +1436,7 @@ public Rule Quoted() { //************* LINES **************** + @MemoMismatches public Rule BlankLine() { return Sequence(Sp(), Newline()); } @@ -1391,6 +1450,7 @@ public Rule Line(StringBuilderVar sb) { //************* ENTITIES **************** + @MemoMismatches public Rule Entity() { return NodeSequence( Sequence('&', FirstOf(HexEntity(), DecEntity(), CharEntity()), ';'), @@ -1398,40 +1458,49 @@ public Rule Entity() { ); } + @MemoMismatches public Rule HexEntity() { return Sequence('#', IgnoreCase('x'), OneOrMore(FirstOf(Digit(), CharRange('a', 'f'), CharRange('A', 'F')))); } + @MemoMismatches public Rule DecEntity() { return Sequence('#', OneOrMore(Digit())); } + @MemoMismatches public Rule CharEntity() { return OneOrMore(Alphanumeric()); } //************* BASICS **************** + @MemoMismatches public Rule Str() { return NodeSequence(OneOrMore(NormalChar()), push(new TextNode(match()))); } + @MemoMismatches public Rule Space() { return NodeSequence(OneOrMore(Spacechar()), push(new TextNode(" "))); } + @MemoMismatches public Rule Spn1() { return Sequence(Sp(), Optional(Newline(), Sp())); } + @MemoMismatches public Rule Sp() { return ZeroOrMore(Spacechar()); } + @MemoMismatches public Rule Spacechar() { return AnyOf(" \t"); } + @MemoMismatches public Rule Nonspacechar() { return Sequence(TestNot(Spacechar()), NotNewline(), ANY); } @@ -1441,6 +1510,7 @@ public Rule NormalChar() { return Sequence(TestNot(SpecialChar()), TestNot(Spacechar()), NotNewline(), ANY); } + @MemoMismatches public Rule EscapedChar() { // Previously all "*_`&[]<>!#\\'\".+-(){}:|~" were treated as escapable // only escape special characters as per John Gruber's list: "\\`*_{}[]()#+-.!" (plus <>&: for HTML escapes) @@ -1448,10 +1518,12 @@ public Rule EscapedChar() { return NodeSequence('\\', EscapableChar(), push(new SpecialTextNode(match()))); } + @MemoMismatches public Rule Symbol() { return NodeSequence(SpecialChar(), push(new SpecialTextNode(match()))); } + @MemoMismatches public Rule SpecialChar() { String chars = "*_`&[]<>!#\\"; if (ext(QUOTES)) { @@ -1483,6 +1555,7 @@ public Rule SpecialChar() { } // make these as per john grubber's original list + <>& + selected extensions + @MemoMismatches public Rule EscapableChar() { // John Gruber's list: "\\`*_{}[]()#+-.!" String chars = "\\`*_{}[]()#+-.!&<>"; @@ -1517,7 +1590,7 @@ public Rule EscapableChar() { public Rule NotNewline() { return TestNot(AnyOf("\n\r")); } - + public Rule Newline() { return FirstOf('\n', Sequence('\r', Optional('\n'))); } @@ -1544,6 +1617,7 @@ public Rule Digit() { //************* ABBREVIATIONS **************** + @MemoMismatches public Rule Abbreviation() { Var node = new Var(); return NodeSequence( @@ -1553,6 +1627,7 @@ public Rule Abbreviation() { ); } + @MemoMismatches public Rule AbbreviationText(Var node) { return Sequence( NodeSequence( @@ -1565,6 +1640,7 @@ public Rule AbbreviationText(Var node) { //************* TABLES **************** + @MemoMismatches public Rule Table() { Var node = new Var(); return NodeSequence( @@ -1591,6 +1667,7 @@ public Rule Table() { ); } + @MemoMismatches public Rule TableCaption() { return Sequence( '[', Sp(), @@ -1600,6 +1677,7 @@ public Rule TableCaption() { ); } + @MemoMismatches public Rule CaptionStart() { return NodeSequence( push(new TableCaptionNode()), @@ -1607,6 +1685,7 @@ public Rule CaptionStart() { ); } + @MemoMismatches public Rule CaptionInline() { return Sequence( TestNot(Newline()), @@ -1642,6 +1721,7 @@ public Rule TableColumn(Var tableNode, Var pipeSeen) { ); } + @MemoMismatches public Rule TableRow() { Var leadingPipe = new Var(Boolean.FALSE); return NodeSequence( @@ -1656,6 +1736,7 @@ public Rule TableRow() { // vsch: #183 Exclude the trailing || from TableCellNode node, leading ones are not included, it makes it more intuitive // that the TableCell will include only the text of the cell. + @MemoMismatches public Rule TableCell() { return Sequence( NodeSequence( @@ -1675,6 +1756,7 @@ public Rule TableCell() { //************* SMARTS **************** + @MemoMismatches public Rule Smarts() { return NodeSequence( FirstOf( @@ -1688,6 +1770,7 @@ public Rule Smarts() { //************* QUOTES **************** + @MemoMismatches public Rule SingleQuoted() { return NodeSequence( !Character.isLetter(getContext().getInputBuffer().charAt(getContext().getCurrentIndex() - 1)), @@ -1698,10 +1781,12 @@ public Rule SingleQuoted() { ); } + @MemoMismatches public Rule SingleQuoteEnd() { return Sequence('\'', TestNot(Alphanumeric())); } + @MemoMismatches public Rule DoubleQuoted() { return NodeSequence( '"', @@ -1711,6 +1796,7 @@ public Rule DoubleQuoted() { ); } + @MemoMismatches public Rule DoubleAngleQuoted() { return NodeSequence( "<<", @@ -1728,11 +1814,11 @@ public Rule DoubleAngleQuoted() { } //************* HELPERS **************** - + public Rule NOrMore(char c, int n) { return Sequence(repeat(c, n), ZeroOrMore(c)); } - + public Rule NodeSequence(Object... nodeRules) { return Sequence( push(getContext().getCurrentIndex()), @@ -1740,7 +1826,7 @@ public Rule NodeSequence(Object... nodeRules) { setIndices() ); } - + public boolean setIndices() { AbstractNode node = (AbstractNode) peek(); node.setStartIndex((Integer)pop(1));