Skip to content

Commit

Permalink
minor change
Browse files Browse the repository at this point in the history
  • Loading branch information
Long Pham committed May 24, 2018
1 parent 770a7e6 commit 6e4a555
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 54 deletions.
87 changes: 41 additions & 46 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ public static void main (String[] args) {
FileInputStream fin = null;
ObjectInputStream ois = null;
try {
fin = new FileInputStream("serialized/kevin.ser");
fin = new FileInputStream("serialized/2.ser");
ois = new ObjectInputStream(fin);
doc = (ESAnnotatedHTMLDocument) ois.readObject();
} catch (Exception e) {
e.printStackTrace();
}
if (doc != null) {
System.out.println(doc.getTitle());
System.out.println(doc.get(CoreAnnotations.TokensAnnotation.class).size());
for (CoreLabel token : doc.get(CoreAnnotations.TokensAnnotation.class)) {
System.out.println(token.word() + " " + token.ner() + " " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,6 @@ public static void main(String[] args) {
long time = System.currentTimeMillis();
long start = time;

WebDriver driver = createChromeDriver();
// System.out.println(getAllTextWithLayout(driver,baseUrl));
time = System.currentTimeMillis();
System.out.println("Finish loading web driver " + (time-start)/1000 + " seconds");
start = time;

AnnotatorFactory.getInstance().getAnnotationPipeline();
time = System.currentTimeMillis();
System.out.println("Finish loading the default annotation pipeline " + (time-start)/1000 + " seconds");
Expand All @@ -291,11 +285,22 @@ public static void main(String[] args) {

System.out.println(i + "\t" + baseUrl);

WebDriver driver = createChromeDriver();
// System.out.println(getAllTextWithLayout(driver,baseUrl));
time = System.currentTimeMillis();
System.out.println("Finish loading web driver " + (time-start)/1000 + " seconds");
start = time;

ESAnnotatedHTMLDocument document = getHTMLDocumentForAnnotation(baseUrl, driver);
time = System.currentTimeMillis();
System.out.println("Finish creating document for annotation " + (time-start)/1000 + " seconds");
start = time;

if (document.get(CoreAnnotations.TokensAnnotation.class).size() <= 1) {
System.err.println("This URL is probably not a web page " + document.getURL());
continue;
}

AnnotatorFactory.getInstance().getAnnotationPipeline().annotate(document);
time = System.currentTimeMillis();
System.out.println("Finish annotation " + (time-start)/1000 + " seconds");
Expand Down Expand Up @@ -324,9 +329,10 @@ public static void main(String[] args) {
printAnnotatedDocument(document);
PipelineHelper.printAnnotatedDocument(document, fields);
}

driver.close();
}

driver.close();

// time = System.currentTimeMillis();
// start = time;
Expand Down
1 change: 1 addition & 0 deletions test_urls.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
0 http://www.forwarddatalab.org/kevinchang
1 http://www.forwarddatalab.org/research
2 https://relate.cs.illinois.edu/course/zuics101fa16/f/lectures/lec05.ipynb

0 comments on commit 6e4a555

Please sign in to comment.