Skip to content

Commit

Permalink
Add a much more detailed explanation of what went wrong when check_co…
Browse files Browse the repository at this point in the history
…nstituents barfs. Apparently that is happening with the Icelandic treebank... #1356
  • Loading branch information
AngledLuffa authored and Jemoka committed Jul 16, 2024
1 parent 6e95b58 commit c91e3ad
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
2 changes: 2 additions & 0 deletions stanza/models/constituency/parse_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ def get_unique_constituent_labels(trees):
"""
Walks over all of the trees and gets all of the unique constituent names from the trees
"""
if isinstance(trees, Tree):
trees = [trees]
constituents = Tree.get_constituent_counts(trees)
return sorted(set(constituents.keys()))

Expand Down
10 changes: 9 additions & 1 deletion stanza/models/constituency/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,15 @@ def check_constituents(train_constituents, trees, treebank_name):
constituents = parse_tree.Tree.get_unique_constituent_labels(trees)
for con in constituents:
if con not in train_constituents:
raise RuntimeError("Found constituent label {} in the {} set which don't exist in the train set".format(con, treebank_name))
first_error = None
num_errors = 0
for tree_idx, tree in enumerate(trees):
constituents = parse_tree.Tree.get_unique_constituent_labels(tree)
if con in constituents:
num_errors += 1
if first_error is None:
first_error = tree_idx
raise RuntimeError("Found constituent label {} in the {} set which don't exist in the train set. This constituent label occured in {} trees, with the first tree index at {} counting from 1\nThe error tree (which may have POS tags changed from the retagger and may be missing functional tags or empty nodes) is:\n{:P}".format(con, treebank_name, num_errors, (first_error+1), trees[first_error]))

def check_root_labels(root_labels, other_trees, treebank_name):
"""
Expand Down

0 comments on commit c91e3ad

Please sign in to comment.