From 6c32ab4c04fb6b3e1844d647fe0fc0c5317a67df Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Mon, 5 Oct 2015 03:13:18 -0400 Subject: [PATCH 01/10] fix #200, wiki links with anchor reference update test --- src/main/java/org/pegdown/LinkRenderer.java | 11 +++++++++-- src/test/resources/pegdown/Wikilinks.html | 16 ++++++---------- src/test/resources/pegdown/Wikilinks.md | 2 ++ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/pegdown/LinkRenderer.java b/src/main/java/org/pegdown/LinkRenderer.java index ff41bf9..851bcc7 100644 --- a/src/main/java/org/pegdown/LinkRenderer.java +++ b/src/main/java/org/pegdown/LinkRenderer.java @@ -97,10 +97,17 @@ public Rendering render(WikiLinkNode node) { int pos; if ((pos = text.indexOf("|")) >= 0) { url = text.substring(0, pos); - text = text.substring(pos+1); + text = text.substring(pos + 1); } - url = "./" + URLEncoder.encode(url.replace(' ', '-'), "UTF-8") + ".html"; + // vsch: #200 WikiLinks can have anchor # refs + String suffix = ""; + if ((pos = url.lastIndexOf("#")) >= 0) { + suffix = url.substring(pos); + url = url.substring(0, pos); + } + + url = "./" + URLEncoder.encode(url.replace(' ', '-'), "UTF-8") + ".html" + suffix; return new Rendering(url, text); } catch (UnsupportedEncodingException e) { throw new IllegalStateException(); diff --git a/src/test/resources/pegdown/Wikilinks.html b/src/test/resources/pegdown/Wikilinks.html index 0b53bd9..b8a8539 100644 --- a/src/test/resources/pegdown/Wikilinks.html +++ b/src/test/resources/pegdown/Wikilinks.html @@ -1,14 +1,10 @@

Wikilinks

-

Wikilinks are simple URIs like Autolinks,
-which will be converted by pegdown.

-

Another example provides a separate link text page link text,
-which will be converted by pegdown.

-

Another example with spaces: Special Chars,
-with square brackets: [Square]Brackets.

+

Wikilinks are simple URIs like Autolinks,
which will be converted by pegdown.

+

Another example provides a separate link text page link text,
Let’s not forget anchor refs page name#ref,
Let’s not forget anchor refs provides a separate link text page link title text,
which will be converted by pegdown.

+

Another example with spaces: Special Chars,
with square brackets: [Square]Brackets.

The following links should work just normally:

diff --git a/src/test/resources/pegdown/Wikilinks.md b/src/test/resources/pegdown/Wikilinks.md index 83767d0..6325c8c 100644 --- a/src/test/resources/pegdown/Wikilinks.md +++ b/src/test/resources/pegdown/Wikilinks.md @@ -4,6 +4,8 @@ Wikilinks are simple URIs like [[Autolinks]], which will be converted by pegdown. Another example provides a separate link text [[page name|page link text]], +Let's not forget anchor refs [[page name#ref]], +Let's not forget anchor refs provides a separate link text [[page name#ref|page link title text]], which will be converted by pegdown. Another example with spaces: [[Special Chars]], From 325d2be75aef8a7c827d0b3384e7db6abc47d6e6 Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Thu, 22 Oct 2015 13:17:02 -0400 Subject: [PATCH 02/10] fix code inline taken as fence start --- build.sbt | 2 +- src/main/java/org/pegdown/Extensions.java | 5 +- src/main/java/org/pegdown/LinkRenderer.java | 19 ++- src/main/java/org/pegdown/Parser.java | 109 ++++++++++-------- .../pegdown/GFM_Fenced_Code_Blocks.html | 10 ++ .../pegdown/GFM_Fenced_Code_Blocks.md | 12 +- 6 files changed, 101 insertions(+), 56 deletions(-) diff --git a/build.sbt b/build.sbt index 4a00743..5ebc05e 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "pegdown" -version := "1.6.0" +version := "1.6.3" homepage := Some(new URL("http://pegdown.org")) diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java index 7644da3..7d58bbc 100644 --- a/src/main/java/org/pegdown/Extensions.java +++ b/src/main/java/org/pegdown/Extensions.java @@ -37,7 +37,7 @@ public interface Extensions { /** * All of the smartypants prettyfications. Equivalent to SMARTS + QUOTES. - * + * * @see Smartypants */ static final int SMARTYPANTS = SMARTS + QUOTES; @@ -71,7 +71,7 @@ public interface Extensions { * @see MultiMarkdown */ static final int TABLES = 0x20; - + /** * PHP Markdown Extra style definition lists. * Additionally supports the small extension proposed in the article referenced below. @@ -152,6 +152,7 @@ public interface Extensions { * Anchor link is added as first element inside the header with empty content: `

header a

` */ static final int EXTANCHORLINKS = 0x00400000; + static final int EXTANCHORLINKS_WRAP = 0x00800000; /** * All Optionals other than Suppress and FORCELISTITEMPARA which is a backwards compatibility extension diff --git a/src/main/java/org/pegdown/LinkRenderer.java b/src/main/java/org/pegdown/LinkRenderer.java index 851bcc7..049c2c3 100644 --- a/src/main/java/org/pegdown/LinkRenderer.java +++ b/src/main/java/org/pegdown/LinkRenderer.java @@ -3,13 +3,14 @@ import org.parboiled.common.StringUtils; import org.pegdown.ast.*; -import static org.pegdown.FastEncoder.*; - import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import static org.pegdown.FastEncoder.encode; +import static org.pegdown.FastEncoder.obfuscate; + /** * A LinkRenderer is responsible for turning an AST node representing a link into a {@link LinkRenderer.Rendering} * instance, which hold the actual properties of the link as it is going to be rendered. @@ -50,6 +51,20 @@ public Rendering withAttribute(String name, String value) { } public Rendering withAttribute(Attribute attr) { + int iMax = attributes.size(); + + // vsch: a little wasteful, a Map would be better, but we don't have too many attributes and + // this will not break code for those that have implemented their own derived ToHtmlSerializers. + for (int i = 0; i < iMax; i++) { + Attribute attribute = attributes.get(i); + if (attribute.name.equals(attr.name)) { + // vsch: need to handle setting multiple classes, works for values too + // concatenate them with space between values, as for class + attr = new Attribute(attr.name, attribute.value + " " + attr.value); + attributes.remove(i); + break; + } + } attributes.add(attr); return this; } diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index 90b0ca7..577a8ce 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -50,7 +50,7 @@ */ @SuppressWarnings( {"InfiniteRecursion"}) public class Parser extends BaseParser implements Extensions { - + protected static final char CROSSED_OUT = '\uffff'; public interface ParseRunnerProvider { @@ -171,7 +171,7 @@ public Rule Verbatim() { return NodeSequence( OneOrMore( ZeroOrMore(BlankLine(), line.append('\n')), - Indent(), push(currentIndex()), + Indent(), push(currentIndex()), OneOrMore( FirstOf( Sequence('\t', line.append(repeat(' ', 4-(currentIndex()-1-(Integer)peek())%4))), @@ -184,11 +184,12 @@ public Rule Verbatim() { push(new VerbatimNode(text.getString())) ); } - + public Rule FencedCodeBlock() { StringBuilderVar text = new StringBuilderVar(); Var markerLength = new Var(); return NodeSequence( + // vsch: test to see if what appears to be a code fence is just inline code CodeFence(markerLength), TestNot(CodeFence(markerLength)), // prevent empty matches ZeroOrMore(BlankLine(), text.append('\n')), @@ -206,12 +207,12 @@ public Rule CodeFence(Var markerLength) { (markerLength.isSet() && matchLength() == markerLength.get()) || (markerLength.isNotSet() && markerLength.set(matchLength())), Sp(), - ZeroOrMore(TestNot(Newline()), ANY), // GFM code type identifier + ZeroOrMore(TestNot(FirstOf(Newline(), '~', '`')), ANY), // GFM code type identifier but exclude fenced code markers push(match()), Newline() ); } - + public Rule HorizontalRule() { return NodeSequence( NonindentSpace(), @@ -320,10 +321,18 @@ public boolean wrapInAnchor() { collectChildrensText(node, nodeInfo); String text = nodeInfo.text.toString().trim(); if (text.length() > 0) { - AnchorLinkNode anchor = new AnchorLinkNode(text, ""); - anchor.setStartIndex(nodeInfo.startIndex); - anchor.setEndIndex(nodeInfo.endIndex); - children.add(0, anchor); + if (ext(EXTANCHORLINKS_WRAP)) { + AnchorLinkNode anchor = new AnchorLinkNode(text, text); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.clear(); + children.add(0, anchor); + } else { + AnchorLinkNode anchor = new AnchorLinkNode(text, ""); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.add(0, anchor); + } } } } else { @@ -358,7 +367,7 @@ public void collectChildrensText(SuperNode node, AnchorNodeInfo nodeInfo) { } //************** Definition Lists ************ - + public Rule DefinitionList() { return NodeSequence( // test for successful definition list match before actually building it to reduce backtracking @@ -379,7 +388,7 @@ public Rule DefinitionList() { ) ); } - + public Rule DefListTerm() { return NodeSequence( TestNot(Spacechar()), @@ -390,7 +399,7 @@ public Rule DefListTerm() { Newline() ); } - + public Rule DefTermInline() { return Sequence( NotNewline(), @@ -398,7 +407,7 @@ public Rule DefTermInline() { Inline() ); } - + public Rule Definition() { SuperNodeCreator itemNodeCreator = new SuperNodeCreator() { public SuperNode create(Node child) { @@ -407,7 +416,7 @@ public SuperNode create(Node child) { }; return ListItem(DefListBullet(), itemNodeCreator); } - + public Rule DefListBullet() { return Sequence(NonindentSpace(), AnyOf(":~"), OneOrMore(Spacechar())); } @@ -617,7 +626,7 @@ public Rule TestNotItem() { ) ); } - + public Rule TestNotListItem() { return TestNot( FirstOf(new ArrayBuilder() @@ -635,7 +644,7 @@ public Rule Enumerator() { public Rule Bullet() { return Sequence(TestNot(HorizontalRule()), NonindentSpace(), AnyOf("+*-"), OneOrMore(Spacechar())); } - + //************* LIST ITEM ACTIONS **************** boolean appendCrossed(StringBuilderVar block) { @@ -694,7 +703,7 @@ boolean wrapFirstItemInPara(SuperNode item) { item.getChildren().set(0, rootNode); return true; } - + SuperNode wrapFirstSubItemInPara(SuperNode item) { Node firstItemFirstChild = item.getChildren().get(0); if (firstItemFirstChild.getChildren().size() == 1) { @@ -826,7 +835,7 @@ public Rule InlineOrIntermediateEndline() { Sequence(Endline(), Test(Inline())) ); } - + @MemoMismatches public Rule Inline() { return Sequence( @@ -898,7 +907,7 @@ public Rule UlOrStarLine() { public Rule CharLine(char c) { return FirstOf(NOrMore(c, 4), Sequence(Spacechar(), OneOrMore(c), Test(Spacechar()))); } - + public Rule StrongOrEmph() { return Sequence( Test(AnyOf("*_")), @@ -951,7 +960,7 @@ public Rule EmphOrStrong(String chars) { Optional(Sequence(EmphOrStrongClose(chars), setClosed())) ); } - + public Rule EmphOrStrongOpen(String chars) { return Sequence( TestNot(CharLine(chars.charAt(0))), @@ -979,10 +988,10 @@ public Rule EmphOrStrongClose(String chars) { ) ); } - + /** * This method checks if the parser can enter an emph or strong sequence - * Emph only allows Strong as direct child, Strong only allows Emph as + * Emph only allows Strong as direct child, Strong only allows Emph as * direct child. */ protected boolean mayEnterEmphOrStrong(String chars){ @@ -990,16 +999,16 @@ protected boolean mayEnterEmphOrStrong(String chars){ return false; } - Object parent = peek(2); + Object parent = peek(2); boolean isStrong = ( chars.length()==2 ); - + if( StrongEmphSuperNode.class.equals( parent.getClass() ) ){ if( ((StrongEmphSuperNode) parent).isStrong() == isStrong ) return false; } return true; } - + /** * This method checks if current position is a legal start position for a * strong or emph sequence by checking the last parsed character(-sequence). @@ -1017,16 +1026,16 @@ protected boolean isLegalEmphOrStrongStartPos(){ if(supernode.getChildren().size() < 1 ) return true; - + lastItem = supernode.getChildren().get( supernode.getChildren().size()-1 ); lastClass = lastItem.getClass(); } - + return ( TextNode.class.equals(lastClass) && ( (TextNode) lastItem).getText().endsWith(" ") ) || ( SimpleNode.class.equals(lastClass) ) || ( java.lang.Integer.class.equals(lastClass) ); } - + /** * Mark the current StrongEmphSuperNode as closed sequence */ @@ -1035,9 +1044,9 @@ protected boolean setClosed(){ node.setClosed(true); return true; } - + /** - * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the + * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the * latest inline node to be added as child does not end with a closing character of the parent. When this * is true, a next test should check if the closing character(s) of the child should become (part of) the * closing character(s) of the parent. @@ -1047,7 +1056,7 @@ protected boolean isStrongCloseCharStolen( String chars ){ return false; Object childClass = peek().getClass(); - + //checks if last `inline` to be added as child is not a StrongEmphSuperNode //that eats up a closing character for the parent StrongEmphSuperNode if( StrongEmphSuperNode.class.equals( childClass ) ){ @@ -1060,14 +1069,14 @@ protected boolean isStrongCloseCharStolen( String chars ){ return true; } } - + return false; } /** * Steals the last close char by marking a previously closed emph/strong node as unclosed. */ - protected boolean stealBackStrongCloseChar(){ + protected boolean stealBackStrongCloseChar(){ StrongEmphSuperNode child = (StrongEmphSuperNode) peek(); child.setClosed(false); addAsChild(); @@ -1075,7 +1084,7 @@ protected boolean stealBackStrongCloseChar(){ push(new ValidEmphOrStrongCloseNode()); return true; } - + /** * This method checks if the last parsed character or sequence is a valid prefix for a closing char for * an emph or strong sequence. @@ -1084,23 +1093,23 @@ protected boolean isLegalEmphOrStrongClosePos(){ Object lastItem = peek(); if ( StrongEmphSuperNode.class.equals( lastItem.getClass() ) ){ List children = ((StrongEmphSuperNode) lastItem).getChildren(); - + if(children.size() < 1) return true; lastItem = children.get( children.size()-1 ); Class lastClass = lastItem.getClass(); - + if( TextNode.class.equals(lastClass) ) return !((TextNode) lastItem).getText().endsWith(" "); if( SimpleNode.class.equals(lastClass) ) return !((SimpleNode) lastItem).getType().equals(SimpleNode.Type.Linebreak); - + } return true; } - + //************* LINKS **************** @@ -1517,7 +1526,7 @@ public Rule EscapableChar() { public Rule NotNewline() { return TestNot(AnyOf("\n\r")); } - + public Rule Newline() { return FirstOf('\n', Sequence('\r', Optional('\n'))); } @@ -1728,11 +1737,11 @@ public Rule DoubleAngleQuoted() { } //************* HELPERS **************** - + public Rule NOrMore(char c, int n) { return Sequence(repeat(c, n), ZeroOrMore(c)); } - + public Rule NodeSequence(Object... nodeRules) { return Sequence( push(getContext().getCurrentIndex()), @@ -1740,14 +1749,14 @@ public Rule NodeSequence(Object... nodeRules) { setIndices() ); } - + public boolean setIndices() { AbstractNode node = (AbstractNode) peek(); node.setStartIndex((Integer)pop(1)); node.setEndIndex(currentIndex()); return true; } - + public boolean addAsChild() { SuperNode parent = (SuperNode) peek(1); List children = parent.getChildren(); @@ -1766,7 +1775,7 @@ public boolean addAsChild() { children.add(child); return true; } - + public Node popAsNode() { return (Node) pop(); } @@ -1778,12 +1787,12 @@ public String popAsString() { public boolean ext(int extension) { return (options & extension) > 0; } - + // called for inner parses for list items and blockquotes public RootNode parseInternal(StringBuilderVar block) { char[] chars = block.getChars(); int[] ixMap = new int[chars.length + 1]; // map of cleaned indices to original indices - + // strip out CROSSED_OUT characters and build index map StringBuilder clean = new StringBuilder(); for (int i = 0; i < chars.length; i++) { @@ -1794,15 +1803,15 @@ public RootNode parseInternal(StringBuilderVar block) { } } ixMap[clean.length()] = chars.length; - + // run inner parse char[] cleaned = new char[clean.length()]; clean.getChars(0, cleaned.length, cleaned, 0); RootNode rootNode = parseInternal(cleaned); - + // correct AST indices with index map fixIndices(rootNode, ixMap); - + return rootNode; } @@ -1824,7 +1833,7 @@ public RootNode parseInternal(char[] source) { } return (RootNode) result.resultValue; } - + ParsingResult parseToParsingResult(char[] source) { parsingStartTimeStamp = System.currentTimeMillis(); return parseRunnerProvider.get(Root()).run(source); diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html index a6e4267..66b05e7 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html @@ -48,3 +48,13 @@ ````` should still be fenced + +

test false opening code fence
+inline code
+inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md index c198d9d..a3ddb5e 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md @@ -47,4 +47,14 @@ public static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) S should still be fenced ````` should still be fenced -```` \ No newline at end of file +```` + +test false opening code fence +```inline code``` +```inline code 2``` + +not code + +``` +fenced block +``` From 45c236e0a3b74662a003da944bddcef0d1e314a4 Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Mon, 26 Oct 2015 20:20:29 -0400 Subject: [PATCH 03/10] fix #203 --- build.sbt | 2 +- src/main/java/org/pegdown/Parser.java | 33 +++++++++++++++++-- .../pegdown/GFM_Fenced_Code_Blocks.ast | 13 +++++++- .../GFM_Fenced_Code_Blocks_reversed_all.html | 8 +++++ ...GFM_Fenced_Code_Blocks_reversed_scala.html | 8 +++++ src/test/resources/pegdown/Tables.html | 28 +++++++++++++++- src/test/resources/pegdown/Tables.md | 9 +++++ .../org/pegdown/AbstractPegDownSpec.scala | 6 ++-- 8 files changed, 100 insertions(+), 7 deletions(-) diff --git a/build.sbt b/build.sbt index 5ebc05e..46033af 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "pegdown" -version := "1.6.3" +version := "1.6.4" homepage := Some(new URL("http://pegdown.org")) diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index 577a8ce..cedd86e 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -1588,8 +1588,8 @@ public Rule Table() { TableDivider(node), Optional( NodeSequence( - TableRow(), push(1, new TableBodyNode()) && addAsChild(), - ZeroOrMore(TableRow(), addAsChild()) + TableRowAfterDivider(), push(1, new TableBodyNode()) && addAsChild(), + ZeroOrMore(TableRowAfterDivider(), addAsChild()) ), addAsChild() // add the TableHeaderNode to the TableNode ), @@ -1663,6 +1663,18 @@ public Rule TableRow() { ); } + public Rule TableRowAfterDivider() { + Var leadingPipe = new Var(Boolean.FALSE); + return NodeSequence( + push(new TableRowNode()), + Optional('|', leadingPipe.set(Boolean.TRUE)), + OneOrMore(TableCellAfterDivider(), addAsChild()), + leadingPipe.get() || ((Node) peek()).getChildren().size() > 1 || + getContext().getInputBuffer().charAt(matchEnd() - 1) == '|', + Sp(), Newline() + ); + } + // vsch: #183 Exclude the trailing || from TableCellNode node, leading ones are not included, it makes it more intuitive // that the TableCell will include only the text of the cell. public Rule TableCell() { @@ -1682,6 +1694,23 @@ public Rule TableCell() { ); } + // vsch: if a table divider was seen then we can have cells that look like the divider cell, it is not a divider + public Rule TableCellAfterDivider() { + return Sequence( + NodeSequence( + push(new TableCellNode()), + Optional(Sp(), TestNot('|'), NotNewline()), + OneOrMore( + TestNot('|'), TestNot(Sp(), Newline()), Inline(), + addAsChild(), + Optional(Sp(), Test('|'), Test(Newline())) + ) + ), + ZeroOrMore('|'), + ((TableCellNode) peek()).setColSpan(Math.max(1, matchLength())) + ); + } + //************* SMARTS **************** public Rule Smarts() { diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast index c430abf..5027046 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast @@ -1,4 +1,4 @@ -RootNode [0-1235] +RootNode [0-1336] ParaNode [0-20] SuperNode [0-20] TextNode [0-19] 'A fenced code block' @@ -18,3 +18,14 @@ RootNode [0-1235] SuperNode [787-822] TextNode [787-822] 'test opening with more than 3 ticks' VerbatimNode [824-1235] 'public static String message(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.message(BUNDLE, key, params);\n}\n\npublic static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.messageOrDefault(BUNDLE, key, "", params);\n}\n```\nshould still be fenced\n`````\nshould still be fenced\n' + ParaNode [1236-1303] + SuperNode [1236-1303] + TextNode [1236-1265] 'test false opening code fence' + SimpleNode [1265-1266] Linebreak + CodeNode [1266-1283] 'inline code' + SimpleNode [1283-1284] Linebreak + CodeNode [1284-1303] 'inline code 2' + ParaNode [1305-1313] + SuperNode [1305-1313] + TextNode [1305-1313] 'not code' + VerbatimNode [1315-1336] 'fenced block\n' diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html index a389360..01f3643 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html @@ -57,4 +57,12 @@ +

test false opening code fence inline code +inline code 2

+

not code

+
+
 
+kcolb decnef
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html index 96f575c..a4cf044 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html @@ -50,3 +50,11 @@ should still be fenced +

test false opening code fence inline code +inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/Tables.html b/src/test/resources/pegdown/Tables.html index 2548c4e..42afc2f 100644 --- a/src/test/resources/pegdown/Tables.html +++ b/src/test/resources/pegdown/Tables.html @@ -100,4 +100,30 @@

Tables

this is the caption - \ No newline at end of file +

Tables with divider looking table cell:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Name Firstname Age
Fox Peter 42
—————–—-:
Guy Ritchie ca. 60
diff --git a/src/test/resources/pegdown/Tables.md b/src/test/resources/pegdown/Tables.md index 5a12e4d..8e9caaf 100644 --- a/src/test/resources/pegdown/Tables.md +++ b/src/test/resources/pegdown/Tables.md @@ -38,3 +38,12 @@ Content | **Cell** | Cell | New section | More | Data | And more | | And more | [this is the caption] + +Tables with divider looking table cell: + + Name | Firstname | Age +------|-----------|----: + Fox | Peter | 42 +------|-----------|----: + Guy | Ritchie | ca. 60 + diff --git a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala index 501a9f8..8760cbc 100644 --- a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala +++ b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala @@ -41,7 +41,7 @@ abstract class AbstractPegDownSpec extends Specification { // actualHtml === "" // tidy up html for fair equality test - val tidyHtml = tidy(actualHtml) + var tidyHtml = tidy(actualHtml) normalize(tidyHtml) === normalize(expectedOutput) } @@ -88,6 +88,8 @@ abstract class AbstractPegDownSpec extends Specification { out.toString } - def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n") + // vsch: seems like there is a bug in Tidy, passing in HTML with
\n results in
\n\n, and passing one with
\n\n results in
\n + // didn't look too deep into it but the following for now solves the problem. + def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n").replace("
\n\n", "
\n") } From 3052c39407d56ccb0b42a3280d035ddf7af2f62f Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Mon, 26 Oct 2015 20:56:00 -0400 Subject: [PATCH 04/10] fix #203, #200, #204 --- src/main/java/org/pegdown/Extensions.java | 11 +- src/main/java/org/pegdown/LinkRenderer.java | 19 ++- src/main/java/org/pegdown/Parser.java | 142 +++++++++++------- .../pegdown/GFM_Fenced_Code_Blocks.ast | 13 +- .../pegdown/GFM_Fenced_Code_Blocks.html | 10 ++ .../pegdown/GFM_Fenced_Code_Blocks.md | 12 +- .../GFM_Fenced_Code_Blocks_reversed_all.html | 8 + ...GFM_Fenced_Code_Blocks_reversed_scala.html | 8 + src/test/resources/pegdown/Tables.html | 28 +++- src/test/resources/pegdown/Tables.md | 9 ++ .../org/pegdown/AbstractPegDownSpec.scala | 4 +- 11 files changed, 204 insertions(+), 60 deletions(-) diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java index 7644da3..d56040c 100644 --- a/src/main/java/org/pegdown/Extensions.java +++ b/src/main/java/org/pegdown/Extensions.java @@ -37,7 +37,7 @@ public interface Extensions { /** * All of the smartypants prettyfications. Equivalent to SMARTS + QUOTES. - * + * * @see Smartypants */ static final int SMARTYPANTS = SMARTS + QUOTES; @@ -71,7 +71,7 @@ public interface Extensions { * @see MultiMarkdown */ static final int TABLES = 0x20; - + /** * PHP Markdown Extra style definition lists. * Additionally supports the small extension proposed in the article referenced below. @@ -153,6 +153,13 @@ public interface Extensions { */ static final int EXTANCHORLINKS = 0x00400000; + /** + * Generate anchor links for headers using complete contents of the header. + * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. + * Anchor link is added wrapping the header content as without EXTANCHORLINKS: `

header a

` + */ + static final int EXTANCHORLINKS_WRAP = 0x00800000; + /** * All Optionals other than Suppress and FORCELISTITEMPARA which is a backwards compatibility extension * diff --git a/src/main/java/org/pegdown/LinkRenderer.java b/src/main/java/org/pegdown/LinkRenderer.java index 851bcc7..049c2c3 100644 --- a/src/main/java/org/pegdown/LinkRenderer.java +++ b/src/main/java/org/pegdown/LinkRenderer.java @@ -3,13 +3,14 @@ import org.parboiled.common.StringUtils; import org.pegdown.ast.*; -import static org.pegdown.FastEncoder.*; - import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import static org.pegdown.FastEncoder.encode; +import static org.pegdown.FastEncoder.obfuscate; + /** * A LinkRenderer is responsible for turning an AST node representing a link into a {@link LinkRenderer.Rendering} * instance, which hold the actual properties of the link as it is going to be rendered. @@ -50,6 +51,20 @@ public Rendering withAttribute(String name, String value) { } public Rendering withAttribute(Attribute attr) { + int iMax = attributes.size(); + + // vsch: a little wasteful, a Map would be better, but we don't have too many attributes and + // this will not break code for those that have implemented their own derived ToHtmlSerializers. + for (int i = 0; i < iMax; i++) { + Attribute attribute = attributes.get(i); + if (attribute.name.equals(attr.name)) { + // vsch: need to handle setting multiple classes, works for values too + // concatenate them with space between values, as for class + attr = new Attribute(attr.name, attribute.value + " " + attr.value); + attributes.remove(i); + break; + } + } attributes.add(attr); return this; } diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index 90b0ca7..cedd86e 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -50,7 +50,7 @@ */ @SuppressWarnings( {"InfiniteRecursion"}) public class Parser extends BaseParser implements Extensions { - + protected static final char CROSSED_OUT = '\uffff'; public interface ParseRunnerProvider { @@ -171,7 +171,7 @@ public Rule Verbatim() { return NodeSequence( OneOrMore( ZeroOrMore(BlankLine(), line.append('\n')), - Indent(), push(currentIndex()), + Indent(), push(currentIndex()), OneOrMore( FirstOf( Sequence('\t', line.append(repeat(' ', 4-(currentIndex()-1-(Integer)peek())%4))), @@ -184,11 +184,12 @@ public Rule Verbatim() { push(new VerbatimNode(text.getString())) ); } - + public Rule FencedCodeBlock() { StringBuilderVar text = new StringBuilderVar(); Var markerLength = new Var(); return NodeSequence( + // vsch: test to see if what appears to be a code fence is just inline code CodeFence(markerLength), TestNot(CodeFence(markerLength)), // prevent empty matches ZeroOrMore(BlankLine(), text.append('\n')), @@ -206,12 +207,12 @@ public Rule CodeFence(Var markerLength) { (markerLength.isSet() && matchLength() == markerLength.get()) || (markerLength.isNotSet() && markerLength.set(matchLength())), Sp(), - ZeroOrMore(TestNot(Newline()), ANY), // GFM code type identifier + ZeroOrMore(TestNot(FirstOf(Newline(), '~', '`')), ANY), // GFM code type identifier but exclude fenced code markers push(match()), Newline() ); } - + public Rule HorizontalRule() { return NodeSequence( NonindentSpace(), @@ -320,10 +321,18 @@ public boolean wrapInAnchor() { collectChildrensText(node, nodeInfo); String text = nodeInfo.text.toString().trim(); if (text.length() > 0) { - AnchorLinkNode anchor = new AnchorLinkNode(text, ""); - anchor.setStartIndex(nodeInfo.startIndex); - anchor.setEndIndex(nodeInfo.endIndex); - children.add(0, anchor); + if (ext(EXTANCHORLINKS_WRAP)) { + AnchorLinkNode anchor = new AnchorLinkNode(text, text); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.clear(); + children.add(0, anchor); + } else { + AnchorLinkNode anchor = new AnchorLinkNode(text, ""); + anchor.setStartIndex(nodeInfo.startIndex); + anchor.setEndIndex(nodeInfo.endIndex); + children.add(0, anchor); + } } } } else { @@ -358,7 +367,7 @@ public void collectChildrensText(SuperNode node, AnchorNodeInfo nodeInfo) { } //************** Definition Lists ************ - + public Rule DefinitionList() { return NodeSequence( // test for successful definition list match before actually building it to reduce backtracking @@ -379,7 +388,7 @@ public Rule DefinitionList() { ) ); } - + public Rule DefListTerm() { return NodeSequence( TestNot(Spacechar()), @@ -390,7 +399,7 @@ public Rule DefListTerm() { Newline() ); } - + public Rule DefTermInline() { return Sequence( NotNewline(), @@ -398,7 +407,7 @@ public Rule DefTermInline() { Inline() ); } - + public Rule Definition() { SuperNodeCreator itemNodeCreator = new SuperNodeCreator() { public SuperNode create(Node child) { @@ -407,7 +416,7 @@ public SuperNode create(Node child) { }; return ListItem(DefListBullet(), itemNodeCreator); } - + public Rule DefListBullet() { return Sequence(NonindentSpace(), AnyOf(":~"), OneOrMore(Spacechar())); } @@ -617,7 +626,7 @@ public Rule TestNotItem() { ) ); } - + public Rule TestNotListItem() { return TestNot( FirstOf(new ArrayBuilder() @@ -635,7 +644,7 @@ public Rule Enumerator() { public Rule Bullet() { return Sequence(TestNot(HorizontalRule()), NonindentSpace(), AnyOf("+*-"), OneOrMore(Spacechar())); } - + //************* LIST ITEM ACTIONS **************** boolean appendCrossed(StringBuilderVar block) { @@ -694,7 +703,7 @@ boolean wrapFirstItemInPara(SuperNode item) { item.getChildren().set(0, rootNode); return true; } - + SuperNode wrapFirstSubItemInPara(SuperNode item) { Node firstItemFirstChild = item.getChildren().get(0); if (firstItemFirstChild.getChildren().size() == 1) { @@ -826,7 +835,7 @@ public Rule InlineOrIntermediateEndline() { Sequence(Endline(), Test(Inline())) ); } - + @MemoMismatches public Rule Inline() { return Sequence( @@ -898,7 +907,7 @@ public Rule UlOrStarLine() { public Rule CharLine(char c) { return FirstOf(NOrMore(c, 4), Sequence(Spacechar(), OneOrMore(c), Test(Spacechar()))); } - + public Rule StrongOrEmph() { return Sequence( Test(AnyOf("*_")), @@ -951,7 +960,7 @@ public Rule EmphOrStrong(String chars) { Optional(Sequence(EmphOrStrongClose(chars), setClosed())) ); } - + public Rule EmphOrStrongOpen(String chars) { return Sequence( TestNot(CharLine(chars.charAt(0))), @@ -979,10 +988,10 @@ public Rule EmphOrStrongClose(String chars) { ) ); } - + /** * This method checks if the parser can enter an emph or strong sequence - * Emph only allows Strong as direct child, Strong only allows Emph as + * Emph only allows Strong as direct child, Strong only allows Emph as * direct child. */ protected boolean mayEnterEmphOrStrong(String chars){ @@ -990,16 +999,16 @@ protected boolean mayEnterEmphOrStrong(String chars){ return false; } - Object parent = peek(2); + Object parent = peek(2); boolean isStrong = ( chars.length()==2 ); - + if( StrongEmphSuperNode.class.equals( parent.getClass() ) ){ if( ((StrongEmphSuperNode) parent).isStrong() == isStrong ) return false; } return true; } - + /** * This method checks if current position is a legal start position for a * strong or emph sequence by checking the last parsed character(-sequence). @@ -1017,16 +1026,16 @@ protected boolean isLegalEmphOrStrongStartPos(){ if(supernode.getChildren().size() < 1 ) return true; - + lastItem = supernode.getChildren().get( supernode.getChildren().size()-1 ); lastClass = lastItem.getClass(); } - + return ( TextNode.class.equals(lastClass) && ( (TextNode) lastItem).getText().endsWith(" ") ) || ( SimpleNode.class.equals(lastClass) ) || ( java.lang.Integer.class.equals(lastClass) ); } - + /** * Mark the current StrongEmphSuperNode as closed sequence */ @@ -1035,9 +1044,9 @@ protected boolean setClosed(){ node.setClosed(true); return true; } - + /** - * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the + * This method checks if current parent is a strong parent based on param `chars`. If so, it checks if the * latest inline node to be added as child does not end with a closing character of the parent. When this * is true, a next test should check if the closing character(s) of the child should become (part of) the * closing character(s) of the parent. @@ -1047,7 +1056,7 @@ protected boolean isStrongCloseCharStolen( String chars ){ return false; Object childClass = peek().getClass(); - + //checks if last `inline` to be added as child is not a StrongEmphSuperNode //that eats up a closing character for the parent StrongEmphSuperNode if( StrongEmphSuperNode.class.equals( childClass ) ){ @@ -1060,14 +1069,14 @@ protected boolean isStrongCloseCharStolen( String chars ){ return true; } } - + return false; } /** * Steals the last close char by marking a previously closed emph/strong node as unclosed. */ - protected boolean stealBackStrongCloseChar(){ + protected boolean stealBackStrongCloseChar(){ StrongEmphSuperNode child = (StrongEmphSuperNode) peek(); child.setClosed(false); addAsChild(); @@ -1075,7 +1084,7 @@ protected boolean stealBackStrongCloseChar(){ push(new ValidEmphOrStrongCloseNode()); return true; } - + /** * This method checks if the last parsed character or sequence is a valid prefix for a closing char for * an emph or strong sequence. @@ -1084,23 +1093,23 @@ protected boolean isLegalEmphOrStrongClosePos(){ Object lastItem = peek(); if ( StrongEmphSuperNode.class.equals( lastItem.getClass() ) ){ List children = ((StrongEmphSuperNode) lastItem).getChildren(); - + if(children.size() < 1) return true; lastItem = children.get( children.size()-1 ); Class lastClass = lastItem.getClass(); - + if( TextNode.class.equals(lastClass) ) return !((TextNode) lastItem).getText().endsWith(" "); if( SimpleNode.class.equals(lastClass) ) return !((SimpleNode) lastItem).getType().equals(SimpleNode.Type.Linebreak); - + } return true; } - + //************* LINKS **************** @@ -1517,7 +1526,7 @@ public Rule EscapableChar() { public Rule NotNewline() { return TestNot(AnyOf("\n\r")); } - + public Rule Newline() { return FirstOf('\n', Sequence('\r', Optional('\n'))); } @@ -1579,8 +1588,8 @@ public Rule Table() { TableDivider(node), Optional( NodeSequence( - TableRow(), push(1, new TableBodyNode()) && addAsChild(), - ZeroOrMore(TableRow(), addAsChild()) + TableRowAfterDivider(), push(1, new TableBodyNode()) && addAsChild(), + ZeroOrMore(TableRowAfterDivider(), addAsChild()) ), addAsChild() // add the TableHeaderNode to the TableNode ), @@ -1654,6 +1663,18 @@ public Rule TableRow() { ); } + public Rule TableRowAfterDivider() { + Var leadingPipe = new Var(Boolean.FALSE); + return NodeSequence( + push(new TableRowNode()), + Optional('|', leadingPipe.set(Boolean.TRUE)), + OneOrMore(TableCellAfterDivider(), addAsChild()), + leadingPipe.get() || ((Node) peek()).getChildren().size() > 1 || + getContext().getInputBuffer().charAt(matchEnd() - 1) == '|', + Sp(), Newline() + ); + } + // vsch: #183 Exclude the trailing || from TableCellNode node, leading ones are not included, it makes it more intuitive // that the TableCell will include only the text of the cell. public Rule TableCell() { @@ -1673,6 +1694,23 @@ public Rule TableCell() { ); } + // vsch: if a table divider was seen then we can have cells that look like the divider cell, it is not a divider + public Rule TableCellAfterDivider() { + return Sequence( + NodeSequence( + push(new TableCellNode()), + Optional(Sp(), TestNot('|'), NotNewline()), + OneOrMore( + TestNot('|'), TestNot(Sp(), Newline()), Inline(), + addAsChild(), + Optional(Sp(), Test('|'), Test(Newline())) + ) + ), + ZeroOrMore('|'), + ((TableCellNode) peek()).setColSpan(Math.max(1, matchLength())) + ); + } + //************* SMARTS **************** public Rule Smarts() { @@ -1728,11 +1766,11 @@ public Rule DoubleAngleQuoted() { } //************* HELPERS **************** - + public Rule NOrMore(char c, int n) { return Sequence(repeat(c, n), ZeroOrMore(c)); } - + public Rule NodeSequence(Object... nodeRules) { return Sequence( push(getContext().getCurrentIndex()), @@ -1740,14 +1778,14 @@ public Rule NodeSequence(Object... nodeRules) { setIndices() ); } - + public boolean setIndices() { AbstractNode node = (AbstractNode) peek(); node.setStartIndex((Integer)pop(1)); node.setEndIndex(currentIndex()); return true; } - + public boolean addAsChild() { SuperNode parent = (SuperNode) peek(1); List children = parent.getChildren(); @@ -1766,7 +1804,7 @@ public boolean addAsChild() { children.add(child); return true; } - + public Node popAsNode() { return (Node) pop(); } @@ -1778,12 +1816,12 @@ public String popAsString() { public boolean ext(int extension) { return (options & extension) > 0; } - + // called for inner parses for list items and blockquotes public RootNode parseInternal(StringBuilderVar block) { char[] chars = block.getChars(); int[] ixMap = new int[chars.length + 1]; // map of cleaned indices to original indices - + // strip out CROSSED_OUT characters and build index map StringBuilder clean = new StringBuilder(); for (int i = 0; i < chars.length; i++) { @@ -1794,15 +1832,15 @@ public RootNode parseInternal(StringBuilderVar block) { } } ixMap[clean.length()] = chars.length; - + // run inner parse char[] cleaned = new char[clean.length()]; clean.getChars(0, cleaned.length, cleaned, 0); RootNode rootNode = parseInternal(cleaned); - + // correct AST indices with index map fixIndices(rootNode, ixMap); - + return rootNode; } @@ -1824,7 +1862,7 @@ public RootNode parseInternal(char[] source) { } return (RootNode) result.resultValue; } - + ParsingResult parseToParsingResult(char[] source) { parsingStartTimeStamp = System.currentTimeMillis(); return parseRunnerProvider.get(Root()).run(source); diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast index c430abf..5027046 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.ast @@ -1,4 +1,4 @@ -RootNode [0-1235] +RootNode [0-1336] ParaNode [0-20] SuperNode [0-20] TextNode [0-19] 'A fenced code block' @@ -18,3 +18,14 @@ RootNode [0-1235] SuperNode [787-822] TextNode [787-822] 'test opening with more than 3 ticks' VerbatimNode [824-1235] 'public static String message(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.message(BUNDLE, key, params);\n}\n\npublic static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) String key, Object... params) {\n return CommonBundle.messageOrDefault(BUNDLE, key, "", params);\n}\n```\nshould still be fenced\n`````\nshould still be fenced\n' + ParaNode [1236-1303] + SuperNode [1236-1303] + TextNode [1236-1265] 'test false opening code fence' + SimpleNode [1265-1266] Linebreak + CodeNode [1266-1283] 'inline code' + SimpleNode [1283-1284] Linebreak + CodeNode [1284-1303] 'inline code 2' + ParaNode [1305-1313] + SuperNode [1305-1313] + TextNode [1305-1313] 'not code' + VerbatimNode [1315-1336] 'fenced block\n' diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html index a6e4267..66b05e7 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.html @@ -48,3 +48,13 @@ ````` should still be fenced + +

test false opening code fence
+inline code
+inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md index c198d9d..a3ddb5e 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks.md @@ -47,4 +47,14 @@ public static String messageOrBlank(@PropertyKey(resourceBundle = BUNDLE_NAME) S should still be fenced ````` should still be fenced -```` \ No newline at end of file +```` + +test false opening code fence +```inline code``` +```inline code 2``` + +not code + +``` +fenced block +``` diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html index a389360..01f3643 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_all.html @@ -57,4 +57,12 @@ +

test false opening code fence inline code +inline code 2

+

not code

+
+
 
+kcolb decnef
+
+
diff --git a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html index 96f575c..a4cf044 100644 --- a/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html +++ b/src/test/resources/pegdown/GFM_Fenced_Code_Blocks_reversed_scala.html @@ -50,3 +50,11 @@ should still be fenced +

test false opening code fence inline code +inline code 2

+

not code

+
+
+fenced block
+
+
diff --git a/src/test/resources/pegdown/Tables.html b/src/test/resources/pegdown/Tables.html index 2548c4e..42afc2f 100644 --- a/src/test/resources/pegdown/Tables.html +++ b/src/test/resources/pegdown/Tables.html @@ -100,4 +100,30 @@

Tables

this is the caption - \ No newline at end of file +

Tables with divider looking table cell:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Name Firstname Age
Fox Peter 42
—————–—-:
Guy Ritchie ca. 60
diff --git a/src/test/resources/pegdown/Tables.md b/src/test/resources/pegdown/Tables.md index 5a12e4d..8e9caaf 100644 --- a/src/test/resources/pegdown/Tables.md +++ b/src/test/resources/pegdown/Tables.md @@ -38,3 +38,12 @@ Content | **Cell** | Cell | New section | More | Data | And more | | And more | [this is the caption] + +Tables with divider looking table cell: + + Name | Firstname | Age +------|-----------|----: + Fox | Peter | 42 +------|-----------|----: + Guy | Ritchie | ca. 60 + diff --git a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala index 501a9f8..b557744 100644 --- a/src/test/scala/org/pegdown/AbstractPegDownSpec.scala +++ b/src/test/scala/org/pegdown/AbstractPegDownSpec.scala @@ -88,6 +88,8 @@ abstract class AbstractPegDownSpec extends Specification { out.toString } - def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n") + // vsch: seems like there is a bug in Tidy, passing in HTML with
\n results in
\n\n, and passing one with
\n\n results in
\n + // didn't look too deep into it but the following for now solves the problem. + def normalize(string: String) = string.replace("\r\n", "\n").replace("\r", "\n").replace("
\n\n", "
\n") } From edb92be5f65116f9a458af0b4ad2ae5464150d5a Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Mon, 26 Oct 2015 21:03:02 -0400 Subject: [PATCH 05/10] fix extensions double def --- src/main/java/org/pegdown/Extensions.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java index f1d3fba..0cce0bf 100644 --- a/src/main/java/org/pegdown/Extensions.java +++ b/src/main/java/org/pegdown/Extensions.java @@ -152,12 +152,10 @@ public interface Extensions { * Anchor link is added as first element inside the header with empty content: `

header a

` */ static final int EXTANCHORLINKS = 0x00400000; - static final int EXTANCHORLINKS_WRAP = 0x00800000; /** - * Generate anchor links for headers using complete contents of the header. - * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. - * Anchor link is added wrapping the header content as without EXTANCHORLINKS: `

header a

` + * EXTANCHORLINKS should wrap header content instead of creating an empty anchor. + * Anchor link wrapps the header content: `

header a

` */ static final int EXTANCHORLINKS_WRAP = 0x00800000; From 94fa76a736099dbf81fcbda59d03844ee6b2fd3e Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Mon, 26 Oct 2015 21:04:31 -0400 Subject: [PATCH 06/10] update readme --- README.markdown | 1 + 1 file changed, 1 insertion(+) diff --git a/README.markdown b/README.markdown index 393ba6c..f433df2 100644 --- a/README.markdown +++ b/README.markdown @@ -32,6 +32,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: * EXTANCHORLINKS: Generate anchor links for headers using complete contents of the header. * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. * Anchor link is added as first element inside the header with empty content: `

header

` +* EXTANCHORLINKS_WRAP: used in conjunction with above to create an anchor that wraps header content: `

header

` Note: _pegdown_ differs from the original Markdown in that it ignores in-word emphasis as in From b5b0314cfd864f1086abf7a1dc4ef56644ed9e62 Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Wed, 28 Oct 2015 00:37:14 -0400 Subject: [PATCH 07/10] add php markdown extras style footnotes --- README.markdown | 48 +++- build.sbt | 2 +- src/main/java/org/pegdown/Extensions.java | 10 +- src/main/java/org/pegdown/Parser.java | 222 ++++++++++-------- .../java/org/pegdown/ToHtmlSerializer.java | 87 +++++-- .../java/org/pegdown/ast/FootnoteNode.java | 48 ++++ .../java/org/pegdown/ast/FootnoteRefNode.java | 37 +++ src/main/java/org/pegdown/ast/RootNode.java | 9 + src/main/java/org/pegdown/ast/Visitor.java | 2 + .../OptionalExtensions/footnotes-ext.ast | 21 ++ .../OptionalExtensions/footnotes-ext.html | 10 + .../OptionalExtensions/footnotes-no-ext.ast | 63 +++++ .../OptionalExtensions/footnotes-no-ext.html | 10 + .../resources/OptionalExtensions/footnotes.md | 16 ++ .../org/pegdown/OptionalExtensionsSpec.scala | 20 ++ 15 files changed, 486 insertions(+), 119 deletions(-) create mode 100644 src/main/java/org/pegdown/ast/FootnoteNode.java create mode 100644 src/main/java/org/pegdown/ast/FootnoteRefNode.java create mode 100644 src/test/resources/OptionalExtensions/footnotes-ext.ast create mode 100644 src/test/resources/OptionalExtensions/footnotes-ext.html create mode 100644 src/test/resources/OptionalExtensions/footnotes-no-ext.ast create mode 100644 src/test/resources/OptionalExtensions/footnotes-no-ext.html create mode 100644 src/test/resources/OptionalExtensions/footnotes.md diff --git a/README.markdown b/README.markdown index f433df2..c644d1a 100644 --- a/README.markdown +++ b/README.markdown @@ -33,7 +33,53 @@ Currently _pegdown_ supports the following extensions over standard Markdown: * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. * Anchor link is added as first element inside the header with empty content: `

header

` * EXTANCHORLINKS_WRAP: used in conjunction with above to create an anchor that wraps header content: `

header

` - +* FOOTNOTES: Support PHP Markdown Extra style footnotes: `[^n] for footnote reference` and `[^n]: Footnote text` for footnotes. Where `n` is one or more digits. Footnotes will be put at the bottom of the page, sequentially numbered in order of appearance of the footnote reference. Footnotes that are not referenced will NOT be included in the HTML output. + + ```markdown + This paragraph has a footnote[^1] and another footnote[^2]. + + This one has more but out of sequence[^4] and [^8]. + + [^2]: Footnote 2 with a bit more text + and another continuation line + + [^1]: Footnote 1 + + [^3]: Unused footnote, it will not be added to the end of the page. + + [^4]: Out of sequence footnote + + [^8]: Have one that is used. + ``` + + will generate: + + ```html +

This paragraph has a footnote1 and another footnote2.

+

This one has more but out of sequence3 and 4.

+
+
    +
  1. Footnote 1

  2. +
  3. Footnote 2 with a bit more text and another continuation line

  4. +
  5. Out of sequence footnote

  6. +
  7. Have one that is used.

  8. +
+
+ ``` + to look like this: + +

This paragraph has a footnote1 and another footnote2.

+

This one has more but out of sequence3 and 4.

+
+
    +
  1. Footnote 1

  2. +
  3. Footnote 2 with a bit more text and another continuation line

  4. +
  5. Out of sequence footnote

  6. +
  7. Have one that is used.

  8. +
+
+ + Note: _pegdown_ differs from the original Markdown in that it ignores in-word emphasis as in > my_cool_file.txt diff --git a/build.sbt b/build.sbt index 46033af..fb86542 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "pegdown" -version := "1.6.4" +version := "1.6.5" homepage := Some(new URL("http://pegdown.org")) diff --git a/src/main/java/org/pegdown/Extensions.java b/src/main/java/org/pegdown/Extensions.java index 0cce0bf..4a373bc 100644 --- a/src/main/java/org/pegdown/Extensions.java +++ b/src/main/java/org/pegdown/Extensions.java @@ -159,11 +159,17 @@ public interface Extensions { */ static final int EXTANCHORLINKS_WRAP = 0x00800000; + /** + * Enables footnote processing [^1]: Text Paragraph with continuations + * and footnote reference [^1] + */ + static final int FOOTNOTES = 0x01000000; + /** * All Optionals other than Suppress and FORCELISTITEMPARA which is a backwards compatibility extension * */ - static final int ALL_OPTIONALS = (ATXHEADERSPACE | RELAXEDHRULES | TASKLISTITEMS | EXTANCHORLINKS); - static final int ALL_WITH_OPTIONALS = ALL | (ATXHEADERSPACE | RELAXEDHRULES | TASKLISTITEMS); + static final int ALL_OPTIONALS = (ATXHEADERSPACE | RELAXEDHRULES | TASKLISTITEMS | EXTANCHORLINKS | FOOTNOTES); + static final int ALL_WITH_OPTIONALS = ALL | (ATXHEADERSPACE | RELAXEDHRULES | TASKLISTITEMS | FOOTNOTES); } diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index cedd86e..d0dc6b0 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -21,7 +21,9 @@ import org.parboiled.BaseParser; import org.parboiled.Context; import org.parboiled.Rule; -import org.parboiled.annotations.*; +import org.parboiled.annotations.Cached; +import org.parboiled.annotations.DontSkipActionsInPredicates; +import org.parboiled.annotations.MemoMismatches; import org.parboiled.common.ArrayBuilder; import org.parboiled.common.ImmutableList; import org.parboiled.parserunners.ParseRunner; @@ -31,24 +33,19 @@ import org.parboiled.support.StringVar; import org.parboiled.support.Var; import org.pegdown.ast.*; -import org.pegdown.ast.Node; import org.pegdown.ast.SimpleNode.Type; import org.pegdown.plugins.PegDownPlugins; -import java.util.Set; -import java.util.HashSet; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; -import static org.parboiled.errors.ErrorUtils.printParseErrors; import static org.parboiled.common.StringUtils.repeat; +import static org.parboiled.errors.ErrorUtils.printParseErrors; /** * Parboiled parser for the standard and extended markdown syntax. * Builds an Abstract Syntax Tree (AST) of {@link Node} objects. */ -@SuppressWarnings( {"InfiniteRecursion"}) +@SuppressWarnings({ "InfiniteRecursion" }) public class Parser extends BaseParser implements Extensions { protected static final char CROSSED_OUT = '\uffff'; @@ -69,6 +66,7 @@ public ParseRunner get(Rule rule) { protected final ParseRunnerProvider parseRunnerProvider; protected final PegDownPlugins plugins; final List abbreviations = new ArrayList(); + final List footnotes = new ArrayList(); final List references = new ArrayList(); long parsingStartTimeStamp = 0L; @@ -93,10 +91,12 @@ public RootNode parse(char[] source) { RootNode root = parseInternal(source); root.setAbbreviations(ImmutableList.copyOf(abbreviations)); root.setReferences(ImmutableList.copyOf(references)); + root.setFootnotes(ImmutableList.copyOf(footnotes)); return root; } finally { abbreviations.clear(); references.clear(); + footnotes.clear(); } } @@ -115,6 +115,7 @@ public Rule Block() { FirstOf(new ArrayBuilder() .add(plugins.getBlockPluginRules()) .add(BlockQuote(), Verbatim()) + .addNonNulls(ext(FOOTNOTES) ? Footnote() : null) .addNonNulls(ext(ABBREVIATIONS) ? Abbreviation() : null) .add(Reference(), HorizontalRule(), Heading(), OrderedList(), BulletList(), HtmlBlock()) .addNonNulls(ext(TABLES) ? Table() : null) @@ -133,7 +134,30 @@ public Rule Para() { // the input had no EOL's at the end at all, even then only one of these will be included NonindentSpace(), Inlines(), push(new ParaNode(popAsNode())), Test(BlankLine()) + ); + } + + //************* FOOTNOTES ******************** + public Rule Footnote() { + Var node = new Var(); + return Sequence( + NodeSequence( + NonindentSpace(), + Sequence( + "[^", OneOrMore(Digit()), ']', Sp(), ':', Sp() + ), + push(node.setAndGet(new FootnoteNode(match()))), + Inlines(), + node.get().setFootnote(popAsNode()) + ), + footnotes.add(node.get()) + ); + } + + public Rule FootnoteRef() { + return NodeSequence( + Sequence("[^", OneOrMore(Digit()), ']'), push(new FootnoteRefNode(match())) ); } @@ -150,7 +174,6 @@ public Rule BlockQuote() { TestNot(BlankLine()), Line(inner) ), -// ZeroOrMore(BlankLine()), inner.append(match()) Optional(Sequence(OneOrMore(BlankLine()), optional.append(match()), Test('>')), inner.append(optional.getString()) && optional.clearContents()) ), @@ -174,7 +197,7 @@ public Rule Verbatim() { Indent(), push(currentIndex()), OneOrMore( FirstOf( - Sequence('\t', line.append(repeat(' ', 4-(currentIndex()-1-(Integer)peek())%4))), + Sequence('\t', line.append(repeat(' ', 4 - (currentIndex() - 1 - (Integer) peek()) % 4))), Sequence(NotNewline(), ANY, line.append(matchedChar())) ) ), @@ -383,7 +406,7 @@ public Rule DefinitionList() { push(new SuperNode()), OneOrMore(DefListTerm(), addAsChild()), OneOrMore(Definition(), addAsChild()), - ((SuperNode)peek(1)).getChildren().addAll(popAsNode().getChildren()), + ((SuperNode) peek(1)).getChildren().addAll(popAsNode().getChildren()), Optional(BlankLine()) ) ); @@ -500,7 +523,7 @@ public Rule ListItem(Rule itemStart, SuperNodeCreator itemNodeCreator) { fixFirstItem((SuperNode) peek(1)) && push(itemNodeCreator.create(parseListBlock(block.appended('\n')))), ZeroOrMore( -// debugMsg("have a " + (tight.get() ? "tight" : "loose") + " list body at " + getContext().getCurrentIndex(), block.getString()), + //debugMsg("have a " + (tight.get() ? "tight" : "loose") + " list body at " + getContext().getCurrentIndex(), block.getString()), push(getContext().getCurrentIndex()), // it is not safe to gobble up the leading blank line if it is followed by another list item // it must be left for it to determine its own looseness. Much safer to just test for it but not consume it. @@ -539,9 +562,9 @@ public Rule TaskListItem(Rule itemStart, SuperNodeTaskItemCreator itemNodeCreato return Sequence( push(getContext().getCurrentIndex()), FirstOf(CrossedOut(BlankLine(), block), tight.set(true)), - CrossedOut(itemStart, block), Optional(CrossedOut(Sequence(FirstOf(Sequence("[ ]", taskType.set(1)), Sequence(FirstOf("[x]","[X]"), taskType.set(2))), OneOrMore(Spacechar())), taskListMarker)), + CrossedOut(itemStart, block), Optional(CrossedOut(Sequence(FirstOf(Sequence("[ ]", taskType.set(1)), Sequence(FirstOf("[x]", "[X]"), taskType.set(2))), OneOrMore(Spacechar())), taskListMarker)), block.append(taskListMarker.getString()), Line(block), -// debugMsg("have a " + taskType.get() + " task list body", block.getString()), + //debugMsg("have a " + taskType.get() + " task list body", block.getString()), ZeroOrMore( Optional(CrossedOut(Indent(), temp)), TestNotItem(), @@ -553,7 +576,7 @@ public Rule TaskListItem(Rule itemStart, SuperNodeTaskItemCreator itemNodeCreato fixFirstItem((SuperNode) peek(1)) && push(itemNodeCreator.create(parseListBlock(block.appended('\n')), taskType.get(), taskListMarker.getString())) && taskListMarker.clearContents(), ZeroOrMore( -// debugMsg("have a " + (tight.get() ? "tight" : "loose") + " list body at " + getContext().getCurrentIndex(), block.getString()), + //debugMsg("have a " + (tight.get() ? "tight" : "loose") + " list body at " + getContext().getCurrentIndex(), block.getString()), push(getContext().getCurrentIndex()), // it is not safe to gobble up the leading blank line if it is followed by another list item // it must be left for it to determine its own looseness. Much safer to just test for it but not consume it. @@ -630,9 +653,9 @@ public Rule TestNotItem() { public Rule TestNotListItem() { return TestNot( FirstOf(new ArrayBuilder() - .add(Bullet(), Enumerator()) - .addNonNulls(ext(DEFINITIONS) ? DefListBullet() : null) - .get() + .add(Bullet(), Enumerator()) + .addNonNulls(ext(DEFINITIONS) ? DefListBullet() : null) + .get() ) ); } @@ -709,7 +732,7 @@ SuperNode wrapFirstSubItemInPara(SuperNode item) { if (firstItemFirstChild.getChildren().size() == 1) { Node firstGrandChild = firstItemFirstChild.getChildren().get(0); if (firstGrandChild instanceof ListItemNode) { - wrapFirstItemInPara((SuperNode)firstGrandChild); + wrapFirstItemInPara((SuperNode) firstGrandChild); } } return item; @@ -784,39 +807,39 @@ public boolean isHtmlTag(String string) { } protected static final Set HTML_TAGS = new HashSet(Arrays.asList( - // https://developer.mozilla.org/en/docs/Web/HTML/Element - // Basic elements - "html", - // Document metadata - "base", "head", "link", "meta", "style", "title", - // Content sectioning - "address", "article", "aside", "body", "footer", "header", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "nav", "section", - // Text bontent - "dd", "div", "dl", "dt", "figcaption", "figure", "hr", "li", "main", "ol", "p", "pre", "ul", - // Inline text semantics - // "abbr" not included, breaks some tests - "a", "b", "bdi", "bdo", "br", "cite", "code", "data", "dfn", "em", "i", "kbd", "mark", "q", "rp", "rt", - "rtc", "ruby", "s", "samp", "small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr", - // Image & multimedia - "area", "audio", "img", "map", "track", "video", - // Embedded content - "embed", "iframe", "object", "param", "source", - // Scripting - "canvas", "noscript", "script", - // Edits - "del", "ins", - // Table content - "caption", "col", "colgroup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", - // Forms - "button", "datalist", "fieldset", "form", "input", "keygen", "label", "legend", "meter", - "optgroup", "option", "output", "progress", "select", "textarea", - // Interactive elements - "details", "dialog", "menu", "menuitem", "summary", - // Web Components - "content", "decorator", "element", "shadow", "template", - // Obsolete and deprecated elements - "acronym", "applet", "basefont", "big", "blink", "center", "dir", "frame", "frameset", - "isindex", "listing", "noembed", "plaintext", "spacer", "strike", "tt", "xmp" + // https://developer.mozilla.org/en/docs/Web/HTML/Element + // Basic elements + "html", + // Document metadata + "base", "head", "link", "meta", "style", "title", + // Content sectioning + "address", "article", "aside", "body", "footer", "header", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "nav", "section", + // Text bontent + "dd", "div", "dl", "dt", "figcaption", "figure", "hr", "li", "main", "ol", "p", "pre", "ul", + // Inline text semantics + // "abbr" not included, breaks some tests + "a", "b", "bdi", "bdo", "br", "cite", "code", "data", "dfn", "em", "i", "kbd", "mark", "q", "rp", "rt", + "rtc", "ruby", "s", "samp", "small", "span", "strong", "sub", "sup", "time", "u", "var", "wbr", + // Image & multimedia + "area", "audio", "img", "map", "track", "video", + // Embedded content + "embed", "iframe", "object", "param", "source", + // Scripting + "canvas", "noscript", "script", + // Edits + "del", "ins", + // Table content + "caption", "col", "colgroup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", + // Forms + "button", "datalist", "fieldset", "form", "input", "keygen", "label", "legend", "meter", + "optgroup", "option", "output", "progress", "select", "textarea", + // Interactive elements + "details", "dialog", "menu", "menuitem", "summary", + // Web Components + "content", "decorator", "element", "shadow", "template", + // Obsolete and deprecated elements + "acronym", "applet", "basefont", "big", "blink", "center", "dir", "frame", "frameset", + "isindex", "listing", "noembed", "plaintext", "spacer", "strike", "tt", "xmp" )); //************* INLINES **************** @@ -853,9 +876,10 @@ public Rule NonLinkInline() { .add(plugins.getInlinePluginRules()) .add(Str(), Endline(), UlOrStarLine(), Space(), StrongOrEmph(), Image(), Code(), InlineHtml(), Entity(), EscapedChar()) - .addNonNulls(ext(QUOTES) ? new Rule[]{SingleQuoted(), DoubleQuoted(), DoubleAngleQuoted()} : null) - .addNonNulls(ext(SMARTS) ? new Rule[]{Smarts()} : null) - .addNonNulls(ext(STRIKETHROUGH) ? new Rule[]{Strike()} : null) + .addNonNulls(ext(QUOTES) ? new Rule[] { SingleQuoted(), DoubleQuoted(), DoubleAngleQuoted() } : null) + .addNonNulls(ext(SMARTS) ? new Rule[] { Smarts() } : null) + .addNonNulls(ext(STRIKETHROUGH) ? new Rule[] { Strike() } : null) + .addNonNulls(ext(FOOTNOTES) ? new Rule[] { FootnoteRef() } : null) .add(Symbol()) .get() ); @@ -916,11 +940,11 @@ public Rule StrongOrEmph() { } public Rule Emph() { - return NodeSequence( FirstOf( EmphOrStrong("*"), EmphOrStrong("_") ) ); + return NodeSequence(FirstOf(EmphOrStrong("*"), EmphOrStrong("_"))); } public Rule Strong() { - return NodeSequence( FirstOf( EmphOrStrong("**"), EmphOrStrong("__") ) ); + return NodeSequence(FirstOf(EmphOrStrong("**"), EmphOrStrong("__"))); } // vsch: TODO: test for unclosed strikethrough sequence carrying through the isClosed attribute, as soon as I can figure out how @@ -994,16 +1018,16 @@ public Rule EmphOrStrongClose(String chars) { * Emph only allows Strong as direct child, Strong only allows Emph as * direct child. */ - protected boolean mayEnterEmphOrStrong(String chars){ - if( !isLegalEmphOrStrongStartPos() ){ + protected boolean mayEnterEmphOrStrong(String chars) { + if (!isLegalEmphOrStrongStartPos()) { return false; } Object parent = peek(2); - boolean isStrong = ( chars.length()==2 ); + boolean isStrong = (chars.length() == 2); - if( StrongEmphSuperNode.class.equals( parent.getClass() ) ){ - if( ((StrongEmphSuperNode) parent).isStrong() == isStrong ) + if (StrongEmphSuperNode.class.equals(parent.getClass())) { + if (((StrongEmphSuperNode) parent).isStrong() == isStrong) return false; } return true; @@ -1013,33 +1037,33 @@ protected boolean mayEnterEmphOrStrong(String chars){ * This method checks if current position is a legal start position for a * strong or emph sequence by checking the last parsed character(-sequence). */ - protected boolean isLegalEmphOrStrongStartPos(){ - if( currentIndex() == 0 ) + protected boolean isLegalEmphOrStrongStartPos() { + if (currentIndex() == 0) return true; Object lastItem = peek(1); Class lastClass = lastItem.getClass(); SuperNode supernode; - while( SuperNode.class.isAssignableFrom(lastClass) ) { + while (SuperNode.class.isAssignableFrom(lastClass)) { supernode = (SuperNode) lastItem; - if(supernode.getChildren().size() < 1 ) + if (supernode.getChildren().size() < 1) return true; - lastItem = supernode.getChildren().get( supernode.getChildren().size()-1 ); + lastItem = supernode.getChildren().get(supernode.getChildren().size() - 1); lastClass = lastItem.getClass(); } - return ( TextNode.class.equals(lastClass) && ( (TextNode) lastItem).getText().endsWith(" ") ) - || ( SimpleNode.class.equals(lastClass) ) - || ( java.lang.Integer.class.equals(lastClass) ); + return (TextNode.class.equals(lastClass) && ((TextNode) lastItem).getText().endsWith(" ")) + || (SimpleNode.class.equals(lastClass)) + || (java.lang.Integer.class.equals(lastClass)); } /** * Mark the current StrongEmphSuperNode as closed sequence */ - protected boolean setClosed(){ + protected boolean setClosed() { StrongEmphSuperNode node = (StrongEmphSuperNode) peek(); node.setClosed(true); return true; @@ -1051,20 +1075,20 @@ protected boolean setClosed(){ * is true, a next test should check if the closing character(s) of the child should become (part of) the * closing character(s) of the parent. */ - protected boolean isStrongCloseCharStolen( String chars ){ - if(chars.length() < 2 ) + protected boolean isStrongCloseCharStolen(String chars) { + if (chars.length() < 2) return false; Object childClass = peek().getClass(); //checks if last `inline` to be added as child is not a StrongEmphSuperNode //that eats up a closing character for the parent StrongEmphSuperNode - if( StrongEmphSuperNode.class.equals( childClass ) ){ + if (StrongEmphSuperNode.class.equals(childClass)) { StrongEmphSuperNode child = (StrongEmphSuperNode) peek(); if (!child.isClosed()) return false; - if( child.getChars().endsWith( chars.substring(0, 1) ) ){ + if (child.getChars().endsWith(chars.substring(0, 1))) { //The nested child ends with closing char for the parent, allow stealing it back return true; } @@ -1076,7 +1100,7 @@ protected boolean isStrongCloseCharStolen( String chars ){ /** * Steals the last close char by marking a previously closed emph/strong node as unclosed. */ - protected boolean stealBackStrongCloseChar(){ + protected boolean stealBackStrongCloseChar() { StrongEmphSuperNode child = (StrongEmphSuperNode) peek(); child.setClosed(false); addAsChild(); @@ -1089,28 +1113,26 @@ protected boolean stealBackStrongCloseChar(){ * This method checks if the last parsed character or sequence is a valid prefix for a closing char for * an emph or strong sequence. */ - protected boolean isLegalEmphOrStrongClosePos(){ + protected boolean isLegalEmphOrStrongClosePos() { Object lastItem = peek(); - if ( StrongEmphSuperNode.class.equals( lastItem.getClass() ) ){ + if (StrongEmphSuperNode.class.equals(lastItem.getClass())) { List children = ((StrongEmphSuperNode) lastItem).getChildren(); - if(children.size() < 1) + if (children.size() < 1) return true; - lastItem = children.get( children.size()-1 ); + lastItem = children.get(children.size() - 1); Class lastClass = lastItem.getClass(); - if( TextNode.class.equals(lastClass) ) + if (TextNode.class.equals(lastClass)) return !((TextNode) lastItem).getText().endsWith(" "); - if( SimpleNode.class.equals(lastClass) ) + if (SimpleNode.class.equals(lastClass)) return !((SimpleNode) lastItem).getType().equals(SimpleNode.Type.Linebreak); - } return true; } - //************* LINKS **************** public Rule Image() { @@ -1124,7 +1146,7 @@ public Rule Image() { public Rule Link() { return NodeSequence( FirstOf(new ArrayBuilder() - .addNonNulls(ext(WIKILINKS) ? new Rule[]{WikiLink()} : null) + .addNonNulls(ext(WIKILINKS) ? new Rule[] { WikiLink() } : null) .add(Sequence(Label(), FirstOf(ExplicitLink(false), ReferenceLink(false)))) .add(AutoLink()) .get() @@ -1163,8 +1185,8 @@ public Rule ReferenceLink(boolean image) { Sequence(push(null), push(null)) // implicit referencelink without trailing [] ), push(image ? - new RefImageNode((SuperNode)popAsNode(), popAsString(), popAsNode()) : - new RefLinkNode((SuperNode)popAsNode(), popAsString(), popAsNode()) + new RefImageNode((SuperNode) popAsNode(), popAsString(), popAsNode()) : + new RefLinkNode((SuperNode) popAsNode(), popAsString(), popAsNode()) ) ); } @@ -1211,10 +1233,10 @@ public Rule AutoLink() { public Rule WikiLink() { return Sequence( - "[[", - OneOrMore(TestNot(Sequence(']',']')), ANY), // might have to restrict from ANY - push(new WikiLinkNode(match())), - "]]" + "[[", + OneOrMore(TestNot(Sequence(']', ']')), ANY), // might have to restrict from ANY + push(new WikiLinkNode(match())), + "]]" ); } @@ -1250,7 +1272,7 @@ public Rule AutoLinkEnd() { //************* REFERENCE **************** // can't treat labels the same as the image alt since the image alt should be able to empty. - public Rule ImageAlt(){ + public Rule ImageAlt() { return Sequence( '[', checkForParsingTimeout(), @@ -1263,6 +1285,7 @@ public Rule ImageAlt(){ public Rule Label() { return Sequence( '[', + (ext(FOOTNOTES) ? TestNot('^', OneOrMore(Digit()), ']') : EMPTY), checkForParsingTimeout(), push(new SuperNode()), OneOrMore(TestNot(']'), NonAutoLinkInline(), addAsChild()), @@ -1475,6 +1498,9 @@ public Rule SpecialChar() { if (ext(DEFINITIONS)) { chars += ":"; } + if (ext(FOOTNOTES)) { + chars += "^"; + } if (ext(TABLES)) { chars += "|"; } @@ -1507,6 +1533,9 @@ public Rule EscapableChar() { if (ext(DEFINITIONS)) { chars += ":"; } + if (ext(FOOTNOTES)) { + chars += "^"; + } if (ext(TABLES)) { chars += "|"; } @@ -1602,7 +1631,9 @@ public Rule Table() { public Rule TableCaption() { return Sequence( - '[', Sp(), + '[', + (ext(FOOTNOTES) ? TestNot('^', OneOrMore(Digit()), ']') : EMPTY), + Sp(), CaptionStart(), Optional(Sp(), Optional(']'), Sp()), Newline() @@ -1781,7 +1812,7 @@ public Rule NodeSequence(Object... nodeRules) { public boolean setIndices() { AbstractNode node = (AbstractNode) peek(); - node.setStartIndex((Integer)pop(1)); + node.setStartIndex((Integer) pop(1)); node.setEndIndex(currentIndex()); return true; } @@ -1881,5 +1912,4 @@ protected interface SuperNodeCreator { protected interface SuperNodeTaskItemCreator extends SuperNodeCreator { SuperNode create(Node child, int taskType, String taskListMarker); } - } diff --git a/src/main/java/org/pegdown/ToHtmlSerializer.java b/src/main/java/org/pegdown/ToHtmlSerializer.java index 0f42ff8..9a8e8b3 100644 --- a/src/main/java/org/pegdown/ToHtmlSerializer.java +++ b/src/main/java/org/pegdown/ToHtmlSerializer.java @@ -22,11 +22,7 @@ import org.pegdown.ast.*; import org.pegdown.plugins.ToHtmlSerializerPlugin; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import java.util.*; import static org.parboiled.common.Preconditions.checkArgNotNull; @@ -41,8 +37,10 @@ public class ToHtmlSerializer implements Visitor { protected TableNode currentTableNode; protected int currentTableColumn; protected boolean inTableHeader; + protected int rootNodeRecursion = 0; protected Map verbatimSerializers; + protected Map referencedFootnotes = new HashMap(); public ToHtmlSerializer(LinkRenderer linkRenderer) { this(linkRenderer, Collections.emptyList()); @@ -72,21 +70,72 @@ public String toHtml(RootNode astRoot) { } public void visit(RootNode node) { - for (ReferenceNode refNode : node.getReferences()) { - visitChildren(refNode); - references.put(normalize(printer.getString()), refNode); - printer.clear(); + rootNodeRecursion++; + try { + for (ReferenceNode refNode : node.getReferences()) { + visitChildren(refNode); + references.put(normalize(printer.getString()), refNode); + printer.clear(); + } + + for (AbbreviationNode abbrNode : node.getAbbreviations()) { + visitChildren(abbrNode); + String abbr = printer.getString(); + printer.clear(); + abbrNode.getExpansion().accept(this); + String expansion = printer.getString(); + abbreviations.put(abbr, expansion); + printer.clear(); + } + + visitChildren(node); + + if (rootNodeRecursion == 1 && referencedFootnotes.size() > 0) { + Map footnotes = new HashMap(); + + for (FootnoteNode footnoteNode : node.getFootnotes()) { + footnotes.put(referencedFootnotes.get(footnoteNode.getNumber()), footnoteNode); + } + + printer.print("
\n"); + printer.print("
\n"); + printer.print("
    \n"); + + for (int i = 0; i < referencedFootnotes.size(); i++) { + int num = i + 1; + if (!footnotes.containsKey(num)) { + // empty footnote + printer.print("
  1. \n"); + } else { + printer.print("
  2. "); + visitChildren((SuperNode) footnotes.get(num).getFootnote()); + printer.print("

    "); + printer.print("
  3. \n"); + } + } + + printer.print("
\n"); + printer.print("
\n"); + } + } finally { + rootNodeRecursion--; } - for (AbbreviationNode abbrNode : node.getAbbreviations()) { - visitChildren(abbrNode); - String abbr = printer.getString(); - printer.clear(); - abbrNode.getExpansion().accept(this); - String expansion = printer.getString(); - abbreviations.put(abbr, expansion); - printer.clear(); + } + + public void visit(FootnoteNode node) { + // this one we don't output for HTML, it is done at the bottom of the page + } + + public void visit(FootnoteRefNode node) { + int footnote = node.getNumber(); + int num = footnote; + if (!referencedFootnotes.containsKey(footnote)) { + num = referencedFootnotes.size() + 1; + referencedFootnotes.put(footnote, num); + } else { + num = referencedFootnotes.get(footnote); } - visitChildren(node); + printer.print("" + num + ""); } public void visit(AbbreviationNode node) { @@ -450,7 +499,7 @@ protected void printImageTag(LinkRenderer.Rendering rendering) { printer.print(" references = ImmutableList.of(); private List abbreviations = ImmutableList.of(); + private List footnotes = ImmutableList.of(); + + public List getFootnotes() { + return footnotes; + } + + public void setFootnotes(List footnotes) { + this.footnotes = footnotes; + } public List getReferences() { return references; diff --git a/src/main/java/org/pegdown/ast/Visitor.java b/src/main/java/org/pegdown/ast/Visitor.java index 7cc4269..e941bad 100644 --- a/src/main/java/org/pegdown/ast/Visitor.java +++ b/src/main/java/org/pegdown/ast/Visitor.java @@ -28,6 +28,8 @@ public interface Visitor { void visit(DefinitionListNode node); void visit(DefinitionNode node); void visit(DefinitionTermNode node); + void visit(FootnoteNode node); + void visit(FootnoteRefNode node); void visit(ExpImageNode node); void visit(ExpLinkNode node); void visit(HeaderNode node); diff --git a/src/test/resources/OptionalExtensions/footnotes-ext.ast b/src/test/resources/OptionalExtensions/footnotes-ext.ast new file mode 100644 index 0000000..abef8c2 --- /dev/null +++ b/src/test/resources/OptionalExtensions/footnotes-ext.ast @@ -0,0 +1,21 @@ +RootNode [0-340] + ParaNode [0-59] + SuperNode [0-59] + TextNode [0-29] 'This paragraph has a footnote' + FootnoteRefNode [29-33] '[^1]' + TextNode [33-54] ' and another footnote' + FootnoteRefNode [54-58] '[^2]' + SpecialTextNode [58-59] '.' + ParaNode [61-113] + SuperNode [61-113] + TextNode [61-98] 'This one has more but out of sequence' + FootnoteRefNode [98-102] '[^4]' + TextNode [102-107] ' and ' + FootnoteRefNode [107-111] '[^8]' + SpecialTextNode [111-112] '.' + TextNode [112-113] ' ' + FootnoteNode [116-187] '[^2]: ' + FootnoteNode [193-209] '[^1]: ' + FootnoteNode [211-278] '[^3]: ' + FootnoteNode [280-310] '[^4]: ' + FootnoteNode [312-340] '[^8]: ' diff --git a/src/test/resources/OptionalExtensions/footnotes-ext.html b/src/test/resources/OptionalExtensions/footnotes-ext.html new file mode 100644 index 0000000..6993ecb --- /dev/null +++ b/src/test/resources/OptionalExtensions/footnotes-ext.html @@ -0,0 +1,10 @@ +

This paragraph has a footnote1 and another footnote2.

+

This one has more but out of sequence3 and 4.

+
+
    +
  1. Footnote 1

  2. +
  3. Footnote 2 with a bit more text
    and another continuation line

  4. +
  5. Out of sequence footnote

  6. +
  7. Have one that is used.

  8. +
+
diff --git a/src/test/resources/OptionalExtensions/footnotes-no-ext.ast b/src/test/resources/OptionalExtensions/footnotes-no-ext.ast new file mode 100644 index 0000000..35a7639 --- /dev/null +++ b/src/test/resources/OptionalExtensions/footnotes-no-ext.ast @@ -0,0 +1,63 @@ +RootNode [0-340] + ParaNode [0-59] + SuperNode [0-59] + TextNode [0-29] 'This paragraph has a footnote' + RefLinkNode [29-33] + SuperNode [0-0] + TextNode [30-32] '^1' + TextNode [33-54] ' and another footnote' + RefLinkNode [54-58] + SuperNode [0-0] + TextNode [55-57] '^2' + SpecialTextNode [58-59] '.' + ParaNode [61-113] + SuperNode [61-113] + TextNode [61-98] 'This one has more but out of sequence' + RefLinkNode [98-102] + SuperNode [0-0] + TextNode [99-101] '^4' + TextNode [102-107] ' and ' + RefLinkNode [107-111] + SuperNode [0-0] + TextNode [108-110] '^8' + SpecialTextNode [111-112] '.' + TextNode [112-113] ' ' + ParaNode [116-187] + SuperNode [116-187] + RefLinkNode [116-120] + SuperNode [0-0] + TextNode [117-119] '^2' + SpecialTextNode [120-121] ':' + TextNode [121-153] ' Footnote 2 with a bit more text' + SimpleNode [153-154] Linebreak + TextNode [154-187] ' and another continuation line' + ParaNode [193-209] + SuperNode [193-209] + RefLinkNode [193-197] + SuperNode [0-0] + TextNode [194-196] '^1' + SpecialTextNode [197-198] ':' + TextNode [198-209] ' Footnote 1' + ParaNode [211-278] + SuperNode [211-278] + RefLinkNode [211-215] + SuperNode [0-0] + TextNode [212-214] '^3' + SpecialTextNode [215-216] ':' + TextNode [216-277] ' Unused footnote, it will not be added to the end of the page' + SpecialTextNode [277-278] '.' + ParaNode [280-310] + SuperNode [280-310] + RefLinkNode [280-284] + SuperNode [0-0] + TextNode [281-283] '^4' + SpecialTextNode [284-285] ':' + TextNode [285-310] ' Out of sequence footnote' + ParaNode [312-340] + SuperNode [312-340] + RefLinkNode [312-316] + SuperNode [0-0] + TextNode [313-315] '^8' + SpecialTextNode [316-317] ':' + TextNode [317-339] ' Have one that is used' + SpecialTextNode [339-340] '.' diff --git a/src/test/resources/OptionalExtensions/footnotes-no-ext.html b/src/test/resources/OptionalExtensions/footnotes-no-ext.html new file mode 100644 index 0000000..455e7fc --- /dev/null +++ b/src/test/resources/OptionalExtensions/footnotes-no-ext.html @@ -0,0 +1,10 @@ +

This paragraph has a footnote[^1] and another footnote[^2].

+

This one has more but out of sequence[^4] and [^8].

+

[^2]: Footnote 2 with a bit more text
+ and another continuation line

+

[^1]: Footnote 1

+

[^3]: Unused footnote, it will not be added to the end of the +page.

+

[^4]: Out of sequence footnote

+

[^8]: Have one that is used.

+ diff --git a/src/test/resources/OptionalExtensions/footnotes.md b/src/test/resources/OptionalExtensions/footnotes.md new file mode 100644 index 0000000..da185c5 --- /dev/null +++ b/src/test/resources/OptionalExtensions/footnotes.md @@ -0,0 +1,16 @@ +This paragraph has a footnote[^1] and another footnote[^2]. + +This one has more but out of sequence[^4] and [^8]. + + +[^2]: Footnote 2 with a bit more text + and another continuation line + +[^1]: Footnote 1 + +[^3]: Unused footnote, it will not be added to the end of the page. + +[^4]: Out of sequence footnote + +[^8]: Have one that is used. + diff --git a/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala b/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala index 7f3c33e..74344d6 100644 --- a/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala +++ b/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala @@ -80,6 +80,26 @@ class OptionalExtensionsSpec extends AbstractPegDownSpec { implicit val processor = new PegDownProcessor(ALL | EXTANCHORLINKS) testASTAlt("OptionalExtensions/extanchors", "-ext") } + + "footnotes-no-ext" in { + implicit val processor = new PegDownProcessor(ALL) + testAlt("OptionalExtensions/footnotes", "-no-ext") + } + + "footnotes-ext AST" in { + implicit val processor = new PegDownProcessor(ALL) + testASTAlt("OptionalExtensions/footnotes", "-no-ext") + } + + "footnotes-ext" in { + implicit val processor = new PegDownProcessor(ALL | FOOTNOTES) + testAlt("OptionalExtensions/footnotes", "-ext") + } + + "footnotes-ext AST" in { + implicit val processor = new PegDownProcessor(ALL | FOOTNOTES) + testASTAlt("OptionalExtensions/footnotes", "-ext") + } } } } From 7531d266633075c2dac24274f78c2a1cb0868e4b Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Wed, 28 Oct 2015 00:37:29 -0400 Subject: [PATCH 08/10] minor readme edit --- README.markdown | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.markdown b/README.markdown index c644d1a..ef5c770 100644 --- a/README.markdown +++ b/README.markdown @@ -38,7 +38,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: ```markdown This paragraph has a footnote[^1] and another footnote[^2]. - This one has more but out of sequence[^4] and [^8]. + This one has more but out of sequence[^4] and[^8]. [^2]: Footnote 2 with a bit more text and another continuation line @@ -56,7 +56,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: ```html

This paragraph has a footnote1 and another footnote2.

-

This one has more but out of sequence3 and 4.

+

This one has more but out of sequence3 and4.


  1. Footnote 1

  2. @@ -69,7 +69,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: to look like this:

    This paragraph has a footnote1 and another footnote2.

    -

    This one has more but out of sequence3 and 4.

    +

    This one has more but out of sequence3 and4.


    1. Footnote 1

    2. From 7667758b64933c93507ce4a3cb4a347879c6992a Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Fri, 30 Oct 2015 12:10:32 -0400 Subject: [PATCH 09/10] change footnote labels to be alphanumeric, dash, dot and underscore instead of just digits --- README.markdown | 10 +- src/main/java/org/pegdown/Parser.java | 16 ++- .../java/org/pegdown/ToHtmlSerializer.java | 10 +- .../java/org/pegdown/ast/FootnoteNode.java | 8 +- .../java/org/pegdown/ast/FootnoteRefNode.java | 8 +- .../OptionalExtensions/footnotes-ext.ast | 37 ++--- .../OptionalExtensions/footnotes-no-ext.ast | 128 ++++++++++-------- .../OptionalExtensions/footnotes-no-ext.html | 14 +- .../resources/OptionalExtensions/footnotes.md | 11 +- .../org/pegdown/OptionalExtensionsSpec.scala | 2 +- 10 files changed, 134 insertions(+), 110 deletions(-) diff --git a/README.markdown b/README.markdown index ef5c770..d3cab93 100644 --- a/README.markdown +++ b/README.markdown @@ -33,14 +33,14 @@ Currently _pegdown_ supports the following extensions over standard Markdown: * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. * Anchor link is added as first element inside the header with empty content: `

      header

      ` * EXTANCHORLINKS_WRAP: used in conjunction with above to create an anchor that wraps header content: `

      header

      ` -* FOOTNOTES: Support PHP Markdown Extra style footnotes: `[^n] for footnote reference` and `[^n]: Footnote text` for footnotes. Where `n` is one or more digits. Footnotes will be put at the bottom of the page, sequentially numbered in order of appearance of the footnote reference. Footnotes that are not referenced will NOT be included in the HTML output. +* FOOTNOTES: Support PHP Markdown Extra style footnotes: `[^n] for footnote reference` and `[^n]: Footnote text` for footnotes. Where `n` is one or more digit, letter, `-`, `_` or `.`. Footnotes will be put at the bottom of the page, sequentially numbered in order of appearance of the footnote reference. Footnotes that are not referenced will NOT be included in the HTML output. ```markdown - This paragraph has a footnote[^1] and another footnote[^2]. + This paragraph has a footnote[^1] and another footnote[^two]. - This one has more but out of sequence[^4] and[^8]. + This one has more but out of sequence[^4] and[^eight]. - [^2]: Footnote 2 with a bit more text + [^two]: Footnote 2 with a bit more text and another continuation line [^1]: Footnote 1 @@ -49,7 +49,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: [^4]: Out of sequence footnote - [^8]: Have one that is used. + [^eight]: Have one that is used. ``` will generate: diff --git a/src/main/java/org/pegdown/Parser.java b/src/main/java/org/pegdown/Parser.java index d0dc6b0..5a58ec5 100644 --- a/src/main/java/org/pegdown/Parser.java +++ b/src/main/java/org/pegdown/Parser.java @@ -145,7 +145,7 @@ public Rule Footnote() { NodeSequence( NonindentSpace(), Sequence( - "[^", OneOrMore(Digit()), ']', Sp(), ':', Sp() + FootnoteLabel(), Sp(), ':', Sp() ), push(node.setAndGet(new FootnoteNode(match()))), Inlines(), @@ -157,10 +157,14 @@ public Rule Footnote() { public Rule FootnoteRef() { return NodeSequence( - Sequence("[^", OneOrMore(Digit()), ']'), push(new FootnoteRefNode(match())) + FootnoteLabel(), push(new FootnoteRefNode(match())) ); } + public Rule FootnoteLabel() { + return Sequence("[^", OneOrMore(AlphanumericDashUnderDot()), ']'); + } + // vsch: #184 modified to only include trailing blank lines if there are more blocks for the blockquote following // otherwise don't include the blank lines, they are not part of the block quote public Rule BlockQuote() { @@ -1285,7 +1289,7 @@ public Rule ImageAlt() { public Rule Label() { return Sequence( '[', - (ext(FOOTNOTES) ? TestNot('^', OneOrMore(Digit()), ']') : EMPTY), + (ext(FOOTNOTES) ? TestNot('^') : EMPTY), checkForParsingTimeout(), push(new SuperNode()), OneOrMore(TestNot(']'), NonAutoLinkInline(), addAsChild()), @@ -1572,6 +1576,10 @@ public Rule Alphanumeric() { return FirstOf(Letter(), Digit()); } + public Rule AlphanumericDashUnderDot() { + return FirstOf(Letter(), Digit(), '-', '_', '.'); + } + public Rule Letter() { return FirstOf(CharRange('a', 'z'), CharRange('A', 'Z')); } @@ -1632,7 +1640,7 @@ public Rule Table() { public Rule TableCaption() { return Sequence( '[', - (ext(FOOTNOTES) ? TestNot('^', OneOrMore(Digit()), ']') : EMPTY), + (ext(FOOTNOTES) ? TestNot('^') : EMPTY), Sp(), CaptionStart(), Optional(Sp(), Optional(']'), Sp()), diff --git a/src/main/java/org/pegdown/ToHtmlSerializer.java b/src/main/java/org/pegdown/ToHtmlSerializer.java index 9a8e8b3..2322c28 100644 --- a/src/main/java/org/pegdown/ToHtmlSerializer.java +++ b/src/main/java/org/pegdown/ToHtmlSerializer.java @@ -40,7 +40,7 @@ public class ToHtmlSerializer implements Visitor { protected int rootNodeRecursion = 0; protected Map verbatimSerializers; - protected Map referencedFootnotes = new HashMap(); + protected Map referencedFootnotes = new HashMap(); public ToHtmlSerializer(LinkRenderer linkRenderer) { this(linkRenderer, Collections.emptyList()); @@ -94,7 +94,7 @@ public void visit(RootNode node) { Map footnotes = new HashMap(); for (FootnoteNode footnoteNode : node.getFootnotes()) { - footnotes.put(referencedFootnotes.get(footnoteNode.getNumber()), footnoteNode); + footnotes.put(referencedFootnotes.get(footnoteNode.getLabel()), footnoteNode); } printer.print("
      \n"); @@ -127,10 +127,10 @@ public void visit(FootnoteNode node) { } public void visit(FootnoteRefNode node) { - int footnote = node.getNumber(); - int num = footnote; + String footnote = node.getLabel(); + int num = referencedFootnotes.size() + 1; + if (!referencedFootnotes.containsKey(footnote)) { - num = referencedFootnotes.size() + 1; referencedFootnotes.put(footnote, num); } else { num = referencedFootnotes.get(footnote); diff --git a/src/main/java/org/pegdown/ast/FootnoteNode.java b/src/main/java/org/pegdown/ast/FootnoteNode.java index ef74915..6f65c29 100644 --- a/src/main/java/org/pegdown/ast/FootnoteNode.java +++ b/src/main/java/org/pegdown/ast/FootnoteNode.java @@ -19,16 +19,16 @@ package org.pegdown.ast; public class FootnoteNode extends TextNode { - private int number; + private String label; private Node footnote; public FootnoteNode(String text) { super(text); - this.number = Integer.parseInt(text.substring(2, text.indexOf(']'))); + this.label = text.substring(2, text.indexOf(']')); } - public int getNumber() { - return number; + public String getLabel() { + return label; } public Node getFootnote() { diff --git a/src/main/java/org/pegdown/ast/FootnoteRefNode.java b/src/main/java/org/pegdown/ast/FootnoteRefNode.java index e061270..d839505 100644 --- a/src/main/java/org/pegdown/ast/FootnoteRefNode.java +++ b/src/main/java/org/pegdown/ast/FootnoteRefNode.java @@ -19,15 +19,15 @@ package org.pegdown.ast; public class FootnoteRefNode extends TextNode { - private int number; + private String label; public FootnoteRefNode(String text) { super(text); - this.number = Integer.parseInt(text.substring(2, text.indexOf(']'))); + this.label = text.substring(2, text.indexOf(']')); } - public int getNumber() { - return number; + public String getLabel() { + return label; } @Override diff --git a/src/test/resources/OptionalExtensions/footnotes-ext.ast b/src/test/resources/OptionalExtensions/footnotes-ext.ast index abef8c2..0b4f1d8 100644 --- a/src/test/resources/OptionalExtensions/footnotes-ext.ast +++ b/src/test/resources/OptionalExtensions/footnotes-ext.ast @@ -1,21 +1,22 @@ -RootNode [0-340] - ParaNode [0-59] - SuperNode [0-59] +RootNode [0-403] + ParaNode [0-61] + SuperNode [0-61] TextNode [0-29] 'This paragraph has a footnote' FootnoteRefNode [29-33] '[^1]' TextNode [33-54] ' and another footnote' - FootnoteRefNode [54-58] '[^2]' - SpecialTextNode [58-59] '.' - ParaNode [61-113] - SuperNode [61-113] - TextNode [61-98] 'This one has more but out of sequence' - FootnoteRefNode [98-102] '[^4]' - TextNode [102-107] ' and ' - FootnoteRefNode [107-111] '[^8]' - SpecialTextNode [111-112] '.' - TextNode [112-113] ' ' - FootnoteNode [116-187] '[^2]: ' - FootnoteNode [193-209] '[^1]: ' - FootnoteNode [211-278] '[^3]: ' - FootnoteNode [280-310] '[^4]: ' - FootnoteNode [312-340] '[^8]: ' + FootnoteRefNode [54-60] '[^two]' + SpecialTextNode [60-61] '.' + ParaNode [63-119] + SuperNode [63-119] + TextNode [63-100] 'This one has more but out of sequence' + FootnoteRefNode [100-104] '[^4]' + TextNode [104-109] ' and ' + FootnoteRefNode [109-117] '[^eight]' + SpecialTextNode [117-118] '.' + TextNode [118-119] ' ' + FootnoteNode [122-195] '[^two]: ' + FootnoteNode [201-217] '[^1]: ' + FootnoteNode [219-292] '[^un-used]: ' + FootnoteNode [294-324] '[^4]: ' + FootnoteNode [326-358] '[^eight]: ' + FootnoteNode [360-403] '[^another_unused]: ' diff --git a/src/test/resources/OptionalExtensions/footnotes-no-ext.ast b/src/test/resources/OptionalExtensions/footnotes-no-ext.ast index 35a7639..ceaee5a 100644 --- a/src/test/resources/OptionalExtensions/footnotes-no-ext.ast +++ b/src/test/resources/OptionalExtensions/footnotes-no-ext.ast @@ -1,63 +1,75 @@ -RootNode [0-340] - ParaNode [0-59] - SuperNode [0-59] +RootNode [0-403] + ParaNode [0-61] + SuperNode [0-61] TextNode [0-29] 'This paragraph has a footnote' RefLinkNode [29-33] SuperNode [0-0] TextNode [30-32] '^1' TextNode [33-54] ' and another footnote' - RefLinkNode [54-58] - SuperNode [0-0] - TextNode [55-57] '^2' - SpecialTextNode [58-59] '.' - ParaNode [61-113] - SuperNode [61-113] - TextNode [61-98] 'This one has more but out of sequence' - RefLinkNode [98-102] - SuperNode [0-0] - TextNode [99-101] '^4' - TextNode [102-107] ' and ' - RefLinkNode [107-111] - SuperNode [0-0] - TextNode [108-110] '^8' - SpecialTextNode [111-112] '.' - TextNode [112-113] ' ' - ParaNode [116-187] - SuperNode [116-187] - RefLinkNode [116-120] - SuperNode [0-0] - TextNode [117-119] '^2' - SpecialTextNode [120-121] ':' - TextNode [121-153] ' Footnote 2 with a bit more text' - SimpleNode [153-154] Linebreak - TextNode [154-187] ' and another continuation line' - ParaNode [193-209] - SuperNode [193-209] - RefLinkNode [193-197] - SuperNode [0-0] - TextNode [194-196] '^1' - SpecialTextNode [197-198] ':' - TextNode [198-209] ' Footnote 1' - ParaNode [211-278] - SuperNode [211-278] - RefLinkNode [211-215] - SuperNode [0-0] - TextNode [212-214] '^3' - SpecialTextNode [215-216] ':' - TextNode [216-277] ' Unused footnote, it will not be added to the end of the page' - SpecialTextNode [277-278] '.' - ParaNode [280-310] - SuperNode [280-310] - RefLinkNode [280-284] - SuperNode [0-0] - TextNode [281-283] '^4' - SpecialTextNode [284-285] ':' - TextNode [285-310] ' Out of sequence footnote' - ParaNode [312-340] - SuperNode [312-340] - RefLinkNode [312-316] - SuperNode [0-0] - TextNode [313-315] '^8' - SpecialTextNode [316-317] ':' - TextNode [317-339] ' Have one that is used' - SpecialTextNode [339-340] '.' + RefLinkNode [54-60] + SuperNode [0-0] + TextNode [55-59] '^two' + SpecialTextNode [60-61] '.' + ParaNode [63-119] + SuperNode [63-119] + TextNode [63-100] 'This one has more but out of sequence' + RefLinkNode [100-104] + SuperNode [0-0] + TextNode [101-103] '^4' + TextNode [104-109] ' and ' + RefLinkNode [109-117] + SuperNode [0-0] + TextNode [110-116] '^eight' + SpecialTextNode [117-118] '.' + TextNode [118-119] ' ' + ParaNode [122-195] + SuperNode [122-195] + RefLinkNode [122-128] + SuperNode [0-0] + TextNode [123-127] '^two' + SpecialTextNode [128-129] ':' + TextNode [129-161] ' Footnote 2 with a bit more text' + SimpleNode [161-162] Linebreak + TextNode [162-195] ' and another continuation line' + ParaNode [201-217] + SuperNode [201-217] + RefLinkNode [201-205] + SuperNode [0-0] + TextNode [202-204] '^1' + SpecialTextNode [205-206] ':' + TextNode [206-217] ' Footnote 1' + ParaNode [219-292] + SuperNode [219-292] + RefLinkNode [219-229] + SuperNode [0-0] + TextNode [220-223] '^un' + SpecialTextNode [223-224] '-' + TextNode [224-228] 'used' + SpecialTextNode [229-230] ':' + TextNode [230-291] ' Unused footnote, it will not be added to the end of the page' + SpecialTextNode [291-292] '.' + ParaNode [294-324] + SuperNode [294-324] + RefLinkNode [294-298] + SuperNode [0-0] + TextNode [295-297] '^4' + SpecialTextNode [298-299] ':' + TextNode [299-324] ' Out of sequence footnote' + ParaNode [326-358] + SuperNode [326-358] + RefLinkNode [326-334] + SuperNode [0-0] + TextNode [327-333] '^eight' + SpecialTextNode [334-335] ':' + TextNode [335-357] ' Have one that is used' + SpecialTextNode [357-358] '.' + ParaNode [360-403] + SuperNode [360-403] + RefLinkNode [360-377] + SuperNode [0-0] + TextNode [361-369] '^another' + SpecialTextNode [369-370] '_' + TextNode [370-376] 'unused' + SpecialTextNode [377-378] ':' + TextNode [378-402] ' This will not be output' + SpecialTextNode [402-403] '.' diff --git a/src/test/resources/OptionalExtensions/footnotes-no-ext.html b/src/test/resources/OptionalExtensions/footnotes-no-ext.html index 455e7fc..3992286 100644 --- a/src/test/resources/OptionalExtensions/footnotes-no-ext.html +++ b/src/test/resources/OptionalExtensions/footnotes-no-ext.html @@ -1,10 +1,12 @@ -

      This paragraph has a footnote[^1] and another footnote[^2].

      -

      This one has more but out of sequence[^4] and [^8].

      -

      [^2]: Footnote 2 with a bit more text
      +

      This paragraph has a footnote[^1] and another +footnote[^two].

      +

      This one has more but out of sequence[^4] and [^eight].

      +

      [^two]: Footnote 2 with a bit more text
      and another continuation line

      [^1]: Footnote 1

      -

      [^3]: Unused footnote, it will not be added to the end of the -page.

      +

      [^un-used]: Unused footnote, it will not be added to the end of +the page.

      [^4]: Out of sequence footnote

      -

      [^8]: Have one that is used.

      +

      [^eight]: Have one that is used.

      +

      [^another_unused]: This will not be output.

      diff --git a/src/test/resources/OptionalExtensions/footnotes.md b/src/test/resources/OptionalExtensions/footnotes.md index da185c5..1da34f1 100644 --- a/src/test/resources/OptionalExtensions/footnotes.md +++ b/src/test/resources/OptionalExtensions/footnotes.md @@ -1,16 +1,17 @@ -This paragraph has a footnote[^1] and another footnote[^2]. +This paragraph has a footnote[^1] and another footnote[^two]. -This one has more but out of sequence[^4] and [^8]. +This one has more but out of sequence[^4] and [^eight]. -[^2]: Footnote 2 with a bit more text +[^two]: Footnote 2 with a bit more text and another continuation line [^1]: Footnote 1 -[^3]: Unused footnote, it will not be added to the end of the page. +[^un-used]: Unused footnote, it will not be added to the end of the page. [^4]: Out of sequence footnote -[^8]: Have one that is used. +[^eight]: Have one that is used. +[^another_unused]: This will not be output. diff --git a/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala b/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala index 74344d6..017d577 100644 --- a/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala +++ b/src/test/scala/org/pegdown/OptionalExtensionsSpec.scala @@ -86,7 +86,7 @@ class OptionalExtensionsSpec extends AbstractPegDownSpec { testAlt("OptionalExtensions/footnotes", "-no-ext") } - "footnotes-ext AST" in { + "footnotes-no-ext AST" in { implicit val processor = new PegDownProcessor(ALL) testASTAlt("OptionalExtensions/footnotes", "-no-ext") } From 193d87c831eaae0f364dc4565dbb0651ed284fca Mon Sep 17 00:00:00 2001 From: Vladimir Schneider Date: Fri, 30 Oct 2015 12:15:04 -0400 Subject: [PATCH 10/10] update readme MultiMarkdown style footnotes, not PHP Extra --- README.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.markdown b/README.markdown index d3cab93..ab7031d 100644 --- a/README.markdown +++ b/README.markdown @@ -33,7 +33,7 @@ Currently _pegdown_ supports the following extensions over standard Markdown: * Spaces and non-alphanumerics replaced by `-`, multiple dashes trimmed to one. * Anchor link is added as first element inside the header with empty content: `

      header

      ` * EXTANCHORLINKS_WRAP: used in conjunction with above to create an anchor that wraps header content: `

      header

      ` -* FOOTNOTES: Support PHP Markdown Extra style footnotes: `[^n] for footnote reference` and `[^n]: Footnote text` for footnotes. Where `n` is one or more digit, letter, `-`, `_` or `.`. Footnotes will be put at the bottom of the page, sequentially numbered in order of appearance of the footnote reference. Footnotes that are not referenced will NOT be included in the HTML output. +* FOOTNOTES: Support MultiMarkdown style footnotes: `[^n] for footnote reference` and `[^n]: Footnote text` for footnotes. Where `n` is one or more digit, letter, `-`, `_` or `.`. Footnotes will be put at the bottom of the page, sequentially numbered in order of appearance of the footnote reference. Footnotes that are not referenced will NOT be included in the HTML output. ```markdown This paragraph has a footnote[^1] and another footnote[^two].