Skip to content

Commit

Permalink
max reference length set
Browse files Browse the repository at this point in the history
  • Loading branch information
Dominika Tkaczyk committed Mar 19, 2015
1 parent df3a950 commit ebed054
Showing 1 changed file with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public class KMeansBibReferenceExtractor implements BibReferenceExtractor {
public static final int MAX_REF_LINES_COUNT = 10000;

public static final int MAX_REFS_COUNT = 1000;

public static final int MAX_REF_LENGTH = 1500;

private static final FeatureVectorBuilder<BxLine, BxDocumentBibReferences> VECTOR_BUILDER =
new FeatureVectorBuilder<BxLine, BxDocumentBibReferences>();
Expand Down Expand Up @@ -116,7 +118,8 @@ public String[] extractBibReferences(BxDocument document) throws AnalysisExcepti
String actRef = "";
for (int i = 0; i < lines.size(); i++) {
if (clusters[firstInstanceClusterNum].contains(instances.get(i))) {
if (!actRef.isEmpty() && actRef.matches(".*[0-9].*") && actRef.matches(".*[a-zA-Z].*")) {
if (!actRef.isEmpty() && actRef.matches(".*[0-9].*") && actRef.matches(".*[a-zA-Z].*")
&& actRef.length() < MAX_REF_LENGTH) {
references.add(actRef);
}
actRef = lines.get(i);
Expand All @@ -130,7 +133,8 @@ public String[] extractBibReferences(BxDocument document) throws AnalysisExcepti
actRef += lines.get(i);
}
}
if (!actRef.isEmpty() && actRef.matches(".*[0-9].*") && actRef.matches(".*[a-zA-Z].*")) {
if (!actRef.isEmpty() && actRef.matches(".*[0-9].*") && actRef.matches(".*[a-zA-Z].*")
&& actRef.length() < MAX_REF_LENGTH) {
references.add(actRef);
}

Expand Down

0 comments on commit ebed054

Please sign in to comment.