From 4ad42a4a0203c7391f2cf3bb72c2c5f6e4203ae4 Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Fri, 25 Sep 2020 20:10:29 +0800 Subject: [PATCH 1/3] fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/44 --- .../seimicrawler/xpath/core/node/Text.java | 9 ++++++++ .../seimicrawler/xpath/JXDocumentTest.java | 23 ++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/seimicrawler/xpath/core/node/Text.java b/src/main/java/org/seimicrawler/xpath/core/node/Text.java index e4bb7de..b3fb471 100644 --- a/src/main/java/org/seimicrawler/xpath/core/node/Text.java +++ b/src/main/java/org/seimicrawler/xpath/core/node/Text.java @@ -12,6 +12,8 @@ import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -59,6 +61,13 @@ public void head(Node node, int depth) { } Element data = new Element(Constants.DEF_TEXT_TAG_NAME); data.text(textNode.getWholeText()); + try { + Method parent = Node.class.getDeclaredMethod("setParentNode",Node.class); + parent.setAccessible(true); + parent.invoke(data,textNode.parent()); + } catch (Exception e) { + //ignore + } CommonUtil.setSameTagIndexInSiblings(data,index); res.add(data); } diff --git a/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java b/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java index 8fe5a23..2270f0d 100644 --- a/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java +++ b/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java @@ -222,8 +222,11 @@ public void testA(){ } } + /** + * fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/52 + */ @Test - public void FixTextBehaviorTest(){ + public void fixTextBehaviorTest(){ String html = "

分类:动漫地区:日本年份:2010

"; JXDocument jxDocument = JXDocument.create(html); List jxNodes = jxDocument.selN("//text()[3]"); @@ -235,4 +238,22 @@ public void FixTextBehaviorTest(){ logger.info("all = {}",allText); } + /** + * fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/44 + */ + @Test + public void fixTextElNoParentTest(){ + String test="
a
need
not need
c
"; + JXDocument j = JXDocument.create(test); + List l = j.selN("//div[@class='a']//text()[not(ancestor::div[@class='e'])]"); + Set finalRes = new HashSet<>(); + for (JXNode i : l){ + logger.info("{}",i.toString()); + finalRes.add(i.asString()); + } + Assert.assertFalse(finalRes.contains("not need")); + Assert.assertTrue(finalRes.contains("need")); + Assert.assertEquals(4, finalRes.size()); + } + } From 8757bce5939202272224648f8250753e7f56b76e Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Fri, 25 Sep 2020 20:11:25 +0800 Subject: [PATCH 2/3] fix https://github.com/zhegexiaohuozi/JsoupXpath/issues/44 --- README.md | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index afac6ce..5779d13 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ maven依赖,全版本请参见[release信息](https://github.com/zhegexiaohuozi/ cn.wanghaomiao JsoupXpath - 2.4.1 + 2.4.2 ``` diff --git a/pom.xml b/pom.xml index 9654d41..f6161ea 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ cn.wanghaomiao JsoupXpath - 2.4.1 + 2.4.2 4.0.0 jar JsoupXpath From 39b1aa794e700d15a700556aa2bc66cb9e3182fe Mon Sep 17 00:00:00 2001 From: xiaohuo Date: Sun, 27 Sep 2020 10:19:34 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E8=A1=A5=E5=85=85assert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/test/java/org/seimicrawler/xpath/JXDocumentTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java b/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java index 2270f0d..0bf5624 100644 --- a/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java +++ b/src/test/java/org/seimicrawler/xpath/JXDocumentTest.java @@ -235,6 +235,7 @@ public void fixTextBehaviorTest(){ Assert.assertEquals("2010", actual); List nodes = jxDocument.selN("//text()"); String allText = StringUtils.join(nodes,""); + Assert.assertEquals("分类:动漫地区:日本年份:2010",allText); logger.info("all = {}",allText); }