-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor(tests/semantic_tree): split into different files
- Loading branch information
Showing
5 changed files
with
340 additions
and
166 deletions.
There are no files selected for viewing
74 changes: 74 additions & 0 deletions
74
tests/unit/semantic_tree/nesting_rules/test_always_nest_as_child_rule.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import bs4 | ||
|
||
from sec_parser import AbstractSemanticElement, TreeBuilder | ||
from sec_parser.processing_engine.html_tag import HtmlTag | ||
from sec_parser.semantic_elements.abstract_semantic_element import AbstractLevelElement | ||
from sec_parser.semantic_tree.nesting_rules import ( | ||
AbstractNestingRule, | ||
AlwaysNestAsChildRule, | ||
AlwaysNestAsParentRule, | ||
NestSameTypeDependingOnLevelRule, | ||
) | ||
from sec_parser.semantic_tree.semantic_tree import SemanticTree | ||
from sec_parser.semantic_tree.tree_node import TreeNode | ||
|
||
|
||
def html_tag(tag_name: str, text: str) -> HtmlTag: | ||
tag = bs4.Tag(name=tag_name) | ||
tag.string = text | ||
return HtmlTag(tag) | ||
|
||
|
||
class BaseElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ParentElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ChildElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredParent(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredChild(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class LeveledElement(AbstractLevelElement): | ||
pass | ||
|
||
|
||
|
||
def test_always_nest_as_child(): | ||
# Arrange | ||
mock_elements = [ | ||
ChildElement(html_tag("tag7", "text7")), | ||
ParentElement(html_tag("tag6", "text6")), | ||
ChildElement(html_tag("tag8", "text8")), | ||
ParentElement(html_tag("tag17", "text17")), | ||
ChildElement(html_tag("tag18", "text18")), | ||
] | ||
|
||
def get_rules() -> list[AbstractNestingRule]: | ||
return [AlwaysNestAsChildRule(ChildElement)] | ||
|
||
tree_builder = TreeBuilder(get_rules) | ||
|
||
# Act | ||
tree = tree_builder.build(mock_elements) | ||
|
||
# Assert | ||
assert len(tree.root_nodes) == 3 | ||
assert isinstance(tree.root_nodes[0].semantic_element, ChildElement) | ||
assert isinstance(tree.root_nodes[1].semantic_element, ParentElement) | ||
assert isinstance(tree.root_nodes[2].semantic_element, ParentElement) | ||
assert len(tree.root_nodes[1].children) == 1 | ||
assert isinstance(tree.root_nodes[1].children[0].semantic_element, ChildElement) | ||
assert len(tree.root_nodes[2].children) == 1 | ||
assert isinstance(tree.root_nodes[1].children[0].semantic_element, ChildElement) | ||
|
72 changes: 72 additions & 0 deletions
72
tests/unit/semantic_tree/nesting_rules/test_always_nest_as_parent_rule.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import bs4 | ||
|
||
from sec_parser import AbstractSemanticElement, TreeBuilder | ||
from sec_parser.processing_engine.html_tag import HtmlTag | ||
from sec_parser.semantic_elements.abstract_semantic_element import AbstractLevelElement | ||
from sec_parser.semantic_tree.nesting_rules import ( | ||
AbstractNestingRule, | ||
AlwaysNestAsChildRule, | ||
AlwaysNestAsParentRule, | ||
NestSameTypeDependingOnLevelRule, | ||
) | ||
from sec_parser.semantic_tree.semantic_tree import SemanticTree | ||
from sec_parser.semantic_tree.tree_node import TreeNode | ||
|
||
|
||
def html_tag(tag_name: str, text: str) -> HtmlTag: | ||
tag = bs4.Tag(name=tag_name) | ||
tag.string = text | ||
return HtmlTag(tag) | ||
|
||
|
||
class BaseElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ParentElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ChildElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredParent(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredChild(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class LeveledElement(AbstractLevelElement): | ||
pass | ||
|
||
|
||
def test_always_nest_as_parent(): | ||
# Arrange | ||
mock_elements = [ | ||
ChildElement(html_tag("tag7", "text7")), | ||
ParentElement(html_tag("tag6", "text6")), | ||
ChildElement(html_tag("tag8", "text8")), | ||
ParentElement(html_tag("tag17", "text17")), | ||
ChildElement(html_tag("tag18", "text18")), | ||
] | ||
|
||
def get_rules() -> list[AbstractNestingRule]: | ||
return [AlwaysNestAsParentRule(ParentElement)] | ||
|
||
tree_builder = TreeBuilder(get_rules) | ||
|
||
# Act | ||
tree = tree_builder.build(mock_elements) | ||
|
||
# Assert | ||
assert len(tree.root_nodes) == 3 | ||
assert isinstance(tree.root_nodes[0].semantic_element, ChildElement) | ||
assert isinstance(tree.root_nodes[1].semantic_element, ParentElement) | ||
assert isinstance(tree.root_nodes[2].semantic_element, ParentElement) | ||
assert len(tree.root_nodes[1].children) == 1 | ||
assert isinstance(tree.root_nodes[1].children[0].semantic_element, ChildElement) | ||
assert len(tree.root_nodes[2].children) == 1 | ||
assert isinstance(tree.root_nodes[1].children[0].semantic_element, ChildElement) |
69 changes: 69 additions & 0 deletions
69
tests/unit/semantic_tree/nesting_rules/test_nest_same_type_depending_on_level_rule.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import bs4 | ||
|
||
from sec_parser import AbstractSemanticElement, TreeBuilder | ||
from sec_parser.processing_engine.html_tag import HtmlTag | ||
from sec_parser.semantic_elements.abstract_semantic_element import AbstractLevelElement | ||
from sec_parser.semantic_tree.nesting_rules import ( | ||
AbstractNestingRule, | ||
AlwaysNestAsChildRule, | ||
AlwaysNestAsParentRule, | ||
NestSameTypeDependingOnLevelRule, | ||
) | ||
from sec_parser.semantic_tree.semantic_tree import SemanticTree | ||
from sec_parser.semantic_tree.tree_node import TreeNode | ||
|
||
|
||
def html_tag(tag_name: str, text: str) -> HtmlTag: | ||
tag = bs4.Tag(name=tag_name) | ||
tag.string = text | ||
return HtmlTag(tag) | ||
|
||
|
||
class BaseElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ParentElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class ChildElement(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredParent(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class IgnoredChild(AbstractSemanticElement): | ||
pass | ||
|
||
|
||
class LeveledElement(AbstractLevelElement): | ||
pass | ||
|
||
|
||
def test_nesting_of_leveled_elements(): | ||
# Arrange | ||
mock_elements = [ | ||
LeveledElement(html_tag("tag1", "text1"), level=1), | ||
LeveledElement(html_tag("tag2", "text2"), level=2), | ||
LeveledElement(html_tag("tag3", "text3"), level=2), | ||
] | ||
|
||
def get_rules() -> list[AbstractNestingRule]: | ||
return [NestSameTypeDependingOnLevelRule()] | ||
|
||
tree_builder = TreeBuilder(get_rules) | ||
|
||
# Act | ||
tree = tree_builder.build(mock_elements) | ||
|
||
# Assert | ||
assert len(tree.root_nodes) == 1 | ||
assert isinstance(tree.root_nodes[0].semantic_element, LeveledElement) | ||
assert tree.root_nodes[0].semantic_element.level == 1 | ||
assert len(tree.root_nodes[0].children) == 2 | ||
for child in tree.root_nodes[0].children: | ||
assert isinstance(child.semantic_element, LeveledElement) | ||
assert child.semantic_element.level == 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.