From 1dabf17b2ccad0173c36da0d05ca296c99ca4006 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Sat, 26 Oct 2024 10:27:40 +0200 Subject: [PATCH] fix: fix non-string table cell handling in chunker Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling_core/transforms/chunker/hierarchical_chunker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py index 524783b..fe558dc 100644 --- a/docling_core/transforms/chunker/hierarchical_chunker.py +++ b/docling_core/transforms/chunker/hierarchical_chunker.py @@ -129,8 +129,8 @@ def _triplet_serialize(cls, table_df: DataFrame) -> str: table_df.index = table_df.index + 1 table_df = table_df.sort_index() - rows = [item.strip() for item in table_df.iloc[:, 0].to_list()] - cols = [item.strip() for item in table_df.iloc[0, :].to_list()] + rows = [str(item).strip() for item in table_df.iloc[:, 0].to_list()] + cols = [str(item).strip() for item in table_df.iloc[0, :].to_list()] nrows = table_df.shape[0] ncols = table_df.shape[1]