Skip to content

Commit

Permalink
Implement DOMNode.getTextContent() according to API (#1704)
Browse files Browse the repository at this point in the history
Fix #1695

Signed-off-by: Christoph Läubrich <[email protected]>
  • Loading branch information
laeubi authored Nov 16, 2024
1 parent 5421ff5 commit 3c30e71
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 5 deletions.
2 changes: 1 addition & 1 deletion org.eclipse.lemminx/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>2.0.2</version>
<version>1.4.01</version>
</dependency>
<dependency>
<groupId>com.kotcrab.remark</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,39 @@ public DOMElement getOrphanEndElement(int offset, String tagName, boolean anyOrp
*/
@Override
public String getTextContent() throws DOMException {
return getNodeValue();

switch (getNodeType()) {
// Text like nodes simply return their node value
case Node.TEXT_NODE:
case Node.CDATA_SECTION_NODE:
case Node.COMMENT_NODE:
case Node.PROCESSING_INSTRUCTION_NODE:
return getNodeValue();
// These special types has to return null
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_TYPE_NODE:
case Node.NOTATION_NODE:
return null;
// concatenation of the textContent attribute value of every child node
default:
if (this.children != null && children.size() > 0) {
final StringBuilder builder = new StringBuilder();
for (DOMNode child : children) {
short nodeType = child.getNodeType();
if (nodeType == Node.COMMENT_NODE || nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
// excluding COMMENT_NODE and PROCESSING_INSTRUCTION_NODE nodes.
continue;
}
String text = child.getTextContent();
if (text != null && !text.isEmpty()) {
builder.append(text);
}
}
return builder.toString();
}
// empty string if the node has no children
return "";
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,19 @@
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.function.Function;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.eclipse.lemminx.dom.DOMDocumentType.DocumentTypeKind;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

/**
* XML parser tests.
Expand All @@ -45,6 +53,66 @@ public void testNestedElement() {
assertDocument("<html><body></body></html>", html);
}

@Test
public void testGetTextContentWithSimpleContent() throws Exception {
assertTextContent("<a><b><c>Hello</c></b></a>", "Hello", Document::getDocumentElement);
}

@Test
public void testGetTextContentWithMixedContent() throws Exception {
assertTextContent("<a>H<b>e<c>ll</c></b>o</a>", "Hello", Document::getDocumentElement);
}

@Test
public void testGetTextContentWithComplexContent() throws Exception {
assertTextContent("<a><b>H</b><c>e</c><b>ll</b><x>o</x></a>", "Hello", Document::getDocumentElement);
}

@Test
public void testGetTextContentWithCharContent() throws Exception {
assertTextContent("<text>Hello</text>", "Hello", Document::getDocumentElement);
}

@Test
public void testGetTextContentWithCDATAContent() throws Exception {
assertTextContent("<a><b><c><![CDATA[Hello]]></c></b></a>", "Hello", Document::getDocumentElement);
}

@Test
public void testGetTextContentWithComment() throws Exception {
assertTextContent("<a><b><c>Hello</c><!-- comments must not be included --></b></a>", "Hello",
Document::getDocumentElement);
}

@Test
public void testGetTextIsNullForDocument() throws Exception {
assertTextContent("<a>Hello</a>", null, d -> d);
}

@Test
public void testGetTextContentWithPI() throws Exception {
assertTextContent("<a><b><c>Hello</c><?PI must not be included ?></b></a>", "Hello",
Document::getDocumentElement);
}

private void assertTextContent(String xml, String expected, Function<Document, Node> nodeExtractor)
throws Exception {
assertTextContent(DOMParser.getInstance().parse(xml, "uri", null), expected, nodeExtractor);
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
assertTextContent(builder.parse(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8))), expected,
nodeExtractor);
}

private void assertTextContent(Document document, String expected, Function<Document, Node> nodeExtractor) {
String textContent = nodeExtractor.apply(document).getTextContent();
if (expected != null) {
assertNotNull(textContent);
}
assertEquals(expected, textContent);
}



@Test
public void testNestedElements() {
DOMNode head = createElement("head", 6, 12, 19, true);
Expand All @@ -56,6 +124,7 @@ public void testNestedElements() {
assertDocument("<html><head></head><body></body></html>", html);
}


@Test
public void testNestedNestedElements() {
DOMNode c = createElement("c", 6, 9, 13, true);
Expand Down Expand Up @@ -95,7 +164,7 @@ public void testEmptyTagT() {

@Test
public void singleEndTag() {
DOMElement meta = (DOMElement) createElement("meta", 0, 0, 7, false);
DOMElement meta = createElement("meta", 0, 0, 7, false);
assertDocument("</meta>", meta);
assertFalse(meta.hasStartTag());
assertTrue(meta.hasEndTag());
Expand All @@ -104,8 +173,8 @@ public void singleEndTag() {

@Test
public void insideEndTag() {
DOMElement meta = (DOMElement) createElement("meta", 6, 6, 13, false);
DOMElement html = (DOMElement) createElement("html", 0, 13, 20, true);
DOMElement meta = createElement("meta", 6, 6, 13, false);
DOMElement html = createElement("html", 0, 13, 20, true);
html.addChild(meta);

assertDocument("<html></meta></html>", html);
Expand Down

0 comments on commit 3c30e71

Please sign in to comment.