Skip to content

Commit

Permalink
minor change
Browse files Browse the repository at this point in the history
  • Loading branch information
Long Pham committed May 24, 2018
1 parent ef2d7fd commit f624514
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 63 deletions.
101 changes: 51 additions & 50 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static void main (String[] args) {
FileInputStream fin = null;
ObjectInputStream ois = null;
try {
fin = new FileInputStream("serialized/2.ser");
fin = new FileInputStream("serialized/0.ser");
ois = new ObjectInputStream(fin);
doc = (ESAnnotatedHTMLDocument) ois.readObject();
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.remote.RemoteWebElement;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -59,7 +61,14 @@ public static void main(String[] args) {
WebDriver driver = new ChromeDriver(options);

String baseUrl = "http://www.forwarddatalab.org/kevinchang";

baseUrl = "https://charm.cs.illinois.edu/";
WebDriverWait wait = new WebDriverWait(driver, 10);
wait.until(new ExpectedCondition<Boolean>() {
@Override
public Boolean apply(WebDriver webDriver) {
return driver.findElement(By.xpath("/html/body")).getText().length() != 0;
}
});
// launch browser and direct it to the Base URL
driver.get(baseUrl);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.stream.Stream;

import org.apache.commons.cli.*;
import org.openqa.selenium.support.ui.WebDriverWait;

public class HTMLDocumentIngestionManager {

Expand Down Expand Up @@ -296,6 +297,11 @@ public static void main(String[] args) {
System.out.println("Finish creating document for annotation " + (time-start)/1000 + " seconds");
start = time;

if (document == null) {
System.err.println("This URL cannot be rendered by Selenium " + document.getURL());
continue;
}

if (document.get(CoreAnnotations.TokensAnnotation.class).size() <= 1) {
System.err.println("This URL is probably not a web page " + document.getURL());
continue;
Expand Down Expand Up @@ -400,18 +406,23 @@ private static ESAnnotatedHTMLDocument getHTMLDocumentForAnnotation(String url,
driver.get(url);
String pageTitle = driver.getTitle();
List<CoreLabel> allTokens = new ArrayList<>();
RemoteWebElement e = (RemoteWebElement) driver.findElement(By.xpath("/html/body"));
travelDOMTreeWithSelenium(e,null,allTokens, driver);
ESAnnotatedHTMLDocument document = new ESAnnotatedHTMLDocument(allTokens);
try{
RemoteWebElement e = (RemoteWebElement) driver.findElement(By.xpath("/html/body"));
travelDOMTreeWithSelenium(e,null,allTokens, driver);
ESAnnotatedHTMLDocument document = new ESAnnotatedHTMLDocument(allTokens);
// List<List<CoreLabel>> allTokens = new ArrayList<>();
// travelDOMTreeWithSelenium2((RemoteWebElement)driver.findElement(By.xpath("/html/body")),null,allTokens, driver);
// ESAnnotatedHTMLDocument document = new ESAnnotatedHTMLDocument();
// document.loadFromTokens(allTokens);
document.setURL(url);
document.setTitle(pageTitle);
document.setHeight(e.getSize().height);
document.setWidth(e.getSize().width);
return document;
document.setURL(url);
document.setTitle(pageTitle);
document.setHeight(e.getSize().height);
document.setWidth(e.getSize().width);
return document;
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}

private static void printAnnotatedDocument(ESAnnotatedHTMLDocument document) {
Expand Down
3 changes: 3 additions & 0 deletions test_urls copy.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0 https://cs.illinois.edu/about-us/awards/faculty-awards/chairs-and-professorships/founder-professor-engineering
1 http://www.forwarddatalab.org/kevinchang
2 https://relate.cs.illinois.edu/course/zuics101fa16/f/lectures/lec05.ipynb
4 changes: 1 addition & 3 deletions test_urls.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
0 http://www.forwarddatalab.org/kevinchang
1 http://www.forwarddatalab.org/research
2 https://relate.cs.illinois.edu/course/zuics101fa16/f/lectures/lec05.ipynb
0 http://charm.cs.illinois.edu/research/episim

0 comments on commit f624514

Please sign in to comment.