Skip to content

Commit

Permalink
update test data to cover multiple heading levels
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Nov 1, 2024
1 parent fde06f7 commit c0e2b64
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 1 deletion.
17 changes: 16 additions & 1 deletion test/data/chunker/0_inp_dl_doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@
},
"children": [],
"label": "section_header",
"level": 2,
"level": 1,
"prov": [
{
"page_no": 1,
Expand Down Expand Up @@ -705,6 +705,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 1,
Expand Down Expand Up @@ -1017,6 +1018,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 2,
Expand Down Expand Up @@ -1147,6 +1149,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 2,
Expand Down Expand Up @@ -1199,6 +1202,7 @@
},
"children": [],
"label": "section_header",
"level": 2,
"prov": [
{
"page_no": 2,
Expand Down Expand Up @@ -1381,6 +1385,7 @@
},
"children": [],
"label": "section_header",
"level": 2,
"prov": [
{
"page_no": 3,
Expand Down Expand Up @@ -1433,6 +1438,7 @@
},
"children": [],
"label": "section_header",
"level": 3,
"prov": [
{
"page_no": 3,
Expand Down Expand Up @@ -1511,6 +1517,7 @@
},
"children": [],
"label": "section_header",
"level": 3,
"prov": [
{
"page_no": 3,
Expand Down Expand Up @@ -1615,6 +1622,7 @@
},
"children": [],
"label": "section_header",
"level": 3,
"prov": [
{
"page_no": 4,
Expand Down Expand Up @@ -1693,6 +1701,7 @@
},
"children": [],
"label": "section_header",
"level": 2,
"prov": [
{
"page_no": 4,
Expand Down Expand Up @@ -1745,6 +1754,7 @@
},
"children": [],
"label": "section_header",
"level": 2,
"prov": [
{
"page_no": 4,
Expand Down Expand Up @@ -1823,6 +1833,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 4,
Expand Down Expand Up @@ -2005,6 +2016,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 5,
Expand Down Expand Up @@ -2057,6 +2069,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 5,
Expand Down Expand Up @@ -2135,6 +2148,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 5,
Expand Down Expand Up @@ -2655,6 +2669,7 @@
},
"children": [],
"label": "section_header",
"level": 1,
"prov": [
{
"page_no": 7,
Expand Down
24 changes: 24 additions & 0 deletions test/data/chunker/0_out_chunks.json
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -860,6 +861,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -902,6 +904,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -944,6 +947,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -986,6 +990,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -1028,6 +1033,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.1 PDF backends"
],
"origin": {
Expand Down Expand Up @@ -1070,6 +1076,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models"
],
"origin": {
Expand Down Expand Up @@ -1112,6 +1119,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"Layout Analysis Model"
],
"origin": {
Expand Down Expand Up @@ -1154,6 +1163,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"Layout Analysis Model"
],
"origin": {
Expand Down Expand Up @@ -1196,6 +1207,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"Table Structure Recognition"
],
"origin": {
Expand Down Expand Up @@ -1238,6 +1251,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"Table Structure Recognition"
],
"origin": {
Expand Down Expand Up @@ -1280,6 +1295,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"Table Structure Recognition"
],
"origin": {
Expand Down Expand Up @@ -1322,6 +1339,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"OCR"
],
"origin": {
Expand Down Expand Up @@ -1364,6 +1383,8 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.2 AI models",
"OCR"
],
"origin": {
Expand Down Expand Up @@ -1406,6 +1427,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.3 Assembly"
],
"origin": {
Expand Down Expand Up @@ -1448,6 +1470,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.4 Extensibility"
],
"origin": {
Expand Down Expand Up @@ -1490,6 +1513,7 @@
],
"headings": [
"Docling Technical Report",
"3 Processing pipeline",
"3.4 Extensibility"
],
"origin": {
Expand Down
Loading

0 comments on commit c0e2b64

Please sign in to comment.